U
    ,-e                     @   sd   d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ e	eZG d	d
 d
ZdS )zTokenization classes for RAG.    N)ListOptional   )BatchEncoding)logging   )	RagConfigc                
   @   s   e Zd Zdd Zdd Zedd Zdd Zd	d
 Zdd Z	dd Z
dd Zdee eee  ee ee eeeedddZdS )RagTokenizerc                 C   s   || _ || _| j | _d S N)question_encoder	generatorcurrent_tokenizer)selfr   r    r   i/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/models/rag/tokenization_rag.py__init__   s    zRagTokenizer.__init__c                 C   sb   t j|rtd| dt j|dd t j|d}t j|d}| j| | j| d S )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr   save_pretrainedr   )r   Zsave_directoryZquestion_encoder_pathZgenerator_pathr   r   r   r   "   s    zRagTokenizer.save_pretrainedc                 K   sZ   ddl m} |dd }|d kr*t|}|j||jdd}|j||jdd}| ||dS )N   )AutoTokenizerconfigr   )r   Z	subfolderr   )r   r   )Zauto.tokenization_autor   popr   from_pretrainedr   r   )clsZpretrained_model_name_or_pathkwargsr   r   r   r   r   r   r   r    +   s    
    zRagTokenizer.from_pretrainedc                 O   s   | j ||S r
   )r   r   argsr"   r   r   r   __call__=   s    zRagTokenizer.__call__c                 O   s   | j j||S r
   )r   batch_decoder#   r   r   r   r&   @   s    zRagTokenizer.batch_decodec                 O   s   | j j||S r
   )r   decoder#   r   r   r   r'   C   s    zRagTokenizer.decodec                 C   s   | j | _d S r
   )r   r   r   r   r   r   _switch_to_input_modeF   s    z"RagTokenizer._switch_to_input_modec                 C   s   | j | _d S r
   )r   r   r(   r   r   r   _switch_to_target_modeI   s    z#RagTokenizer._switch_to_target_modeNlongestT)	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc              	   K   s   t dt |d kr| jj}| |fd||||d|}	|d krD|	S |d krT| jj}| f |d||||d|}
|
d |	d< |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr1   r.   r0   r2   )Ztext_targetr4   r1   r0   r.   r2   Z	input_idslabels)warningswarnFutureWarningr   Zmodel_max_length)r   r,   r-   r.   r/   r0   r1   r2   r"   Zmodel_inputsr5   r   r   r   prepare_seq2seq_batchL   sB    		z"RagTokenizer.prepare_seq2seq_batch)NNNr+   NT)__name__
__module____qualname__r   r   classmethodr    r%   r&   r'   r)   r*   r   strr   intboolr   r9   r   r   r   r   r	      s2   	
      
r	   )__doc__r   r6   typingr   r   Ztokenization_utils_baser   utilsr   Zconfiguration_ragr   Z
get_loggerr:   loggerr	   r   r   r   r   <module>   s   
