U
    9%e                     @   sP   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 G d	d
 d
eZ
dS )z%
Speech processor class for Wav2Vec2
    N)contextmanager   )ProcessorMixin   )Wav2Vec2FeatureExtractor)Wav2Vec2CTCTokenizerc                       sd   e Zd ZdZdZdZ fddZe fddZdd	 Z	d
d Z
dd Zdd Zedd Z  ZS )Wav2Vec2Processora  
    Constructs a Wav2Vec2 processor which wraps a Wav2Vec2 feature extractor and a Wav2Vec2 CTC tokenizer into a single
    processor.

    [`Wav2Vec2Processor`] offers all the functionalities of [`Wav2Vec2FeatureExtractor`] and [`PreTrainedTokenizer`].
    See the docstring of [`~Wav2Vec2Processor.__call__`] and [`~Wav2Vec2Processor.decode`] for more information.

    Args:
        feature_extractor (`Wav2Vec2FeatureExtractor`):
            An instance of [`Wav2Vec2FeatureExtractor`]. The feature extractor is a required input.
        tokenizer ([`PreTrainedTokenizer`]):
            An instance of [`PreTrainedTokenizer`]. The tokenizer is a required input.
    r   ZAutoTokenizerc                    s    t  || | j| _d| _d S )NF)super__init__feature_extractorcurrent_processor_in_target_context_manager)selfr   	tokenizer	__class__ o/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/models/wav2vec2/processing_wav2vec2.pyr
   +   s    zWav2Vec2Processor.__init__c                    sj   zt  j|f|W S  tk
rd   td| j dt tj|f|}tj|f|}| ||d Y S X d S )NzLoading a tokenizer inside a   from a config that does not include a `tokenizer_class` attribute is deprecated and will be removed in v5. Please add `'tokenizer_class': 'Wav2Vec2CTCTokenizer'` attribute to either your `config.json` or `tokenizer_config.json` file to suppress this warning: )r   r   )	r	   from_pretrainedOSErrorwarningswarn__name__FutureWarningr   r   )clsZpretrained_model_name_or_pathkwargsr   r   r   r   r   r   0   s    	z!Wav2Vec2Processor.from_pretrainedc                 O   s   | j r| j||S d|kr0td |d}n|dd}|dd}|dd}t|dkrt|d }|dd }|dkr|dkrtd	|dk	r| j|f|d|i|}|dk	r| j|f|}|dkr|S |dkr|S |d
 |d< |S dS )a  
        When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's
        [`~Wav2Vec2FeatureExtractor.__call__`] and returns its output. If used in the context
        [`~Wav2Vec2Processor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
        [`~PreTrainedTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information.
        Z
raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.audioNsampling_ratetextr   r   zAYou need to specify either an `audio` or `text` input to process.	input_idslabels)	r   r   r   r   poplen
ValueErrorr   r   )r   argsr   r   r   r   inputs	encodingsr   r   r   __call__C   s.    
zWav2Vec2Processor.__call__c                 O   s   | j r| jj||S |dd}|dd}t|dkrL|d }|dd }|dk	rh| jj|f||}|dk	r| jj|f|}|dkr|S |dkr|S |d |d< |S dS )a  
        When used in normal mode, this method forwards all its arguments to Wav2Vec2FeatureExtractor's
        [`~Wav2Vec2FeatureExtractor.pad`] and returns its output. If used in the context
        [`~Wav2Vec2Processor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
        [`~PreTrainedTokenizer.pad`]. Please refer to the docstring of the above two methods for more information.
        input_featuresNr    r   r   r   )r   r   padr!   r"   r   r   )r   r$   r   r(   r    r   r   r   r)   i   s"    zWav2Vec2Processor.padc                 O   s   | j j||S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r   batch_decoder   r$   r   r   r   r   r*      s    zWav2Vec2Processor.batch_decodec                 O   s   | j j||S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer
        to the docstring of this method for more information.
        )r   decoder+   r   r   r   r,      s    zWav2Vec2Processor.decodec                 c   s0   t d d| _| j| _dV  | j| _d| _dS )z
        Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning
        Wav2Vec2.
        z`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r   r   r   r   r   r   )r   r   r   r   as_target_processor   s    z%Wav2Vec2Processor.as_target_processor)r   
__module____qualname____doc__Zfeature_extractor_classZtokenizer_classr
   classmethodr   r'   r)   r*   r,   r   r-   __classcell__r   r   r   r   r      s   &r   )r0   r   
contextlibr   Zprocessing_utilsr   Zfeature_extraction_wav2vec2r   Ztokenization_wav2vec2r   r   r   r   r   r   <module>   s   