U
    ,-eq                     @   s.  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ere  rddl%Z%e#&e'Z(ed Z)G d	d
 d
eZ*G dd deZ+ee+j,e+_,e+j,j dk	r*e+j,j j-dddde+j,_ dS )zI
 Feature extraction saving/loading class for common feature extractors.
    N)UserDict)TYPE_CHECKINGAnyDictOptionalTupleUnion   )custom_object_save)FEATURE_EXTRACTOR_NAMEPushToHubMixin
TensorTypeadd_model_info_to_auto_mapcached_file	copy_funcdownload_urlis_flax_availableis_jax_tensoris_numpy_arrayis_offline_modeis_remote_urlis_tf_availableis_torch_availableis_torch_deviceis_torch_dtypeloggingrequires_backendsZSequenceFeatureExtractorc                       s   e Zd ZdZdeeeef  edee	f d fddZ
eee dddZed	d
dZdd Zdd Zdd Zdd Zdd Zdeeee	f  dddZd dddZ  ZS )BatchFeatureaI  
    Holds the output of the [`~SequenceFeatureExtractor.pad`] and feature extractor specific `__call__` methods.

    This class is derived from a python dictionary and can be used as a dictionary.

    Args:
        data (`dict`):
            Dictionary of lists/arrays/tensors returned by the __call__/pad methods ('input_values', 'attention_mask',
            etc.).
        tensor_type (`Union[None, str, TensorType]`, *optional*):
            You can give a tensor_type here to convert the lists of integers in PyTorch/TensorFlow/Numpy Tensors at
            initialization.
    N)datatensor_typec                    s   t  | | j|d d S )Nr   )super__init__convert_to_tensors)selfr   r   	__class__ f/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/transformers/feature_extraction_utils.pyr"   L   s    zBatchFeature.__init__)itemreturnc                 C   s    t |tr| j| S tddS )z
        If the key is a string, returns the value of the dict associated to `key` ('input_values', 'attention_mask',
        etc.).
        zRIndexing with integers is not available when using Python based feature extractorsN)
isinstancestrr   KeyErrorr$   r)   r'   r'   r(   __getitem__P   s    

zBatchFeature.__getitem__)r)   c                 C   s*   z| j | W S  tk
r$   tY nX d S N)r   r-   AttributeErrorr.   r'   r'   r(   __getattr__Z   s    zBatchFeature.__getattr__c                 C   s
   d| j iS Nr   r   r$   r'   r'   r(   __getstate__`   s    zBatchFeature.__getstate__c                 C   s   d|kr|d | _ d S r3   r4   )r$   stater'   r'   r(   __setstate__c   s    zBatchFeature.__setstate__c                 C   s
   | j  S r0   )r   keysr5   r'   r'   r(   r9   h   s    zBatchFeature.keysc                 C   s
   | j  S r0   )r   valuesr5   r'   r'   r(   r:   l   s    zBatchFeature.valuesc                 C   s
   | j  S r0   )r   itemsr5   r'   r'   r(   r;   p   s    zBatchFeature.itemsr    c                    s  |dkr| S t |tst|}|tjkrLt s6tdddl}|j |j}nv|tjkrt	 sdtdddl
fdd j}nB|tjkrt stdddlm} |j t}nd fdd	 t}|  D ]L\}}z||s |}|| |< W q   |d	krtd
tdY qX q| S )a5  
        Convert the inner content to tensors.

        Args:
            tensor_type (`str` or [`~utils.TensorType`], *optional*):
                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
                `None`, no modification is done.
        NzSUnable to convert output to TensorFlow tensors format, TensorFlow is not installed.r   zMUnable to convert output to PyTorch tensors format, PyTorch is not installed.c                    s>   t | ttfr4t| dkr4t | d tjr4t| }  | S )Nr   )r+   listtuplelennpndarrayarraytensor)value)torchr'   r(   	as_tensor   s    *
z2BatchFeature.convert_to_tensors.<locals>.as_tensorzEUnable to convert output to JAX tensors format, JAX is not installed.c                    sn   t | ttfr`t | d tttjfr`dd | D }tt|dkr`|d kr` dd | D td} tj| |dS )Nr   c                 S   s   g | ]}t |qS r'   )r>   .0valr'   r'   r(   
<listcomp>   s     zFBatchFeature.convert_to_tensors.<locals>.as_tensor.<locals>.<listcomp>r	   c                 S   s   g | ]}t |qS r'   )r?   asarrayrF   r'   r'   r(   rI      s     )dtype)	r+   r<   r=   r?   r@   r>   setobjectrJ   )rC   rK   Z
value_lens)rE   r'   r(   rE      s
    $Zoverflowing_valueszKUnable to create tensor returning overflowing values of different lengths. zUnable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.)N)r+   r   Z
TENSORFLOWr   ImportErrorZ
tensorflowZconstant	is_tensorZPYTORCHr   rD   ZJAXr   Z	jax.numpynumpyrA   r   r   r;   
ValueError)r$   r   tfrO   ZjnpkeyrC   rB   r'   )rE   rD   r(   r#   s   sL    	




zBatchFeature.convert_to_tensorsr*   c           	      O   s   t | dg ddl}i }|d}|dkr~t|dkr~|d }t|rHn6t|tsdt|sdt|trj|}nt	dt| d| 
 D ]F\}}||r|j||||< q|dk	r|j|d||< q|||< q|| _| S )a  
        Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
        different `dtypes` and sending the `BatchFeature` to a different `device`.

        Args:
            args (`Tuple`):
                Will be passed to the `to(...)` function of the tensors.
            kwargs (`Dict`, *optional*):
                Will be passed to the `to(...)` function of the tensors.

        Returns:
            [`BatchFeature`]: The same instance after modification.
        rD   r   Ndevicez*Attempting to cast a BatchFeature to type z. This is not supported.)rU   )r   rD   getr>   r   r+   r,   r   intrQ   r;   Zis_floating_pointtor   )	r$   argskwargsrD   Znew_datarU   argkvr'   r'   r(   rX      s&    


zBatchFeature.to)NN)N)__name__
__module____qualname____doc__r   r   r,   r   r   r   r"   r/   r2   r6   r8   r9   r:   r;   r#   rX   __classcell__r'   r'   r%   r(   r   =   s   ,
Ir   c                
   @   sJ  e Zd ZdZdZdd ZedddZed&e	ee
jf ee	ee
jf  eeee	eef  ed
ddZd'e	ee
jf edddZee	ee
jf eeeef eeef f dddZeeeef edddZeeef dddZee	ee
jf edddZedddZe	ee
jf ddd Zd!d" Zed(d$d%ZdS ))FeatureExtractionMixinz
    This is a feature extraction mixin used to provide saving/loading functionality for sequential and image feature
    extractors.
    Nc                 K   sx   | dd| _| D ]\\}}zt| || W q tk
rp } z$td| d| d|   |W 5 d}~X Y qX qdS )z'Set elements of `kwargs` as attributes.processor_classNz
Can't set z with value z for )pop_processor_classr;   setattrr1   loggererror)r$   rZ   rS   rC   errr'   r'   r(   r"      s    zFeatureExtractionMixin.__init__)rd   c                 C   s
   || _ dS )z%Sets processor class as an attribute.N)rf   )r$   rd   r'   r'   r(   _set_processor_class   s    z+FeatureExtractionMixin._set_processor_classFmain)pretrained_model_name_or_path	cache_dirforce_downloadlocal_files_onlytokenrevisionc           
      K   s   ||d< ||d< ||d< ||d< | dd}|dk	rTtdt |dk	rPtd|}|dk	rd||d	< | j|f|\}	}| j|	f|S )
az  
        Instantiate a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a feature extractor, *e.g.* a
        derived class of [`SequenceFeatureExtractor`].

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co. Valid model ids can be located at the root-level, like `bert-base-uncased`, or
                  namespaced under a user or organization name, like `dbmdz/bert-base-german-cased`.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download (`bool`, *optional*, defaults to `False`):
                Whether or not to delete incompletely received file. Attempts to resume the download if such a file
                exists.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or `bool`, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
                the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.


                <Tip>

                To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>".

                </Tip>

            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`].

        Examples:

        ```python
        # We can't instantiate directly the base class *FeatureExtractionMixin* nor *SequenceFeatureExtractor* so let's show the examples on a
        # derived class: *Wav2Vec2FeatureExtractor*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h"
        )  # Download feature_extraction_config from huggingface.co and cache.
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "./test/saved_model/"
        )  # E.g. feature_extractor (or model) was saved using *save_pretrained('./test/saved_model/')*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("./test/saved_model/preprocessor_config.json")
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False
        )
        assert feature_extractor.return_attention_mask is False
        feature_extractor, unused_kwargs = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False, return_unused_kwargs=True
        )
        assert feature_extractor.return_attention_mask is False
        assert unused_kwargs == {"foo": False}
        ```rn   ro   rp   rr   use_auth_tokenNVThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.rq   )re   warningswarnFutureWarningrQ   get_feature_extractor_dict	from_dict)
clsrm   rn   ro   rp   rq   rr   rZ   rs   feature_extractor_dictr'   r'   r(   from_pretrained  s&    Y z&FeatureExtractionMixin.from_pretrained)save_directorypush_to_hubc           	      K   s  | dd}|dk	r@tdt |dddk	r8td||d< tj|r\t	d| dtj
|dd	 |r| d
d}| d|tjjd }| j|f|}| |}| jdk	rt| || d tj|t}| | td|  |r| j|||||dd |gS )az  
        Save a feature_extractor object to the directory `save_directory`, so that it can be re-loaded using the
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] class method.

        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file will be saved (will be created if it does not exist).
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        rs   Nrt   rq   ru   zProvided path (z#) should be a directory, not a fileT)exist_okcommit_messagerepo_id)configzFeature extractor saved in )r   rq   )re   rv   rw   rx   rV   rQ   ospathisfileAssertionErrormakedirssplitsepZ_create_repoZ_get_files_timestamps_auto_classr
   joinr   to_json_filerh   infoZ_upload_modified_files)	r$   r~   r   rZ   rs   r   r   Zfiles_timestampsZoutput_feature_extractor_filer'   r'   r(   save_pretrainedq  sB     


z&FeatureExtractionMixin.save_pretrained)rm   r*   c                 K   sR  | dd}| dd}| dd}| dd}| dd}| dd}| d	d}	| d
d}
|dk	rtdt |dk	rtd|}| dd}| dd}d|d}|dk	r||d< t r|	std d}	t|}t	j
|}t	j
|rt	j
|t}t	j
|r|}d}nt|r2|}t|}nnt}z t|||||||	|||
d
}W nH tk
rn    Y n2 tk
r   td| d| dt dY nX z0t|ddd}| }W 5 Q R X t|}W n( tjk
r   td| dY nX |rtd|  ntd| d|  d |krJ|sJt|d  ||d < ||fS )!a6  
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] using `from_dict`.

        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.

        Returns:
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the feature extractor object.
        rn   Nro   Fresume_downloadproxiesrq   rs   rp   rr   rt   ru   Z_from_pipelineZ
_from_autofeature extractor)	file_typefrom_auto_classZusing_pipelinez+Offline mode: forcing local_files_only=TrueT)rn   ro   r   r   rp   rq   
user_agentrr   z"Can't load feature extractor for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z filerutf-8encodingz"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at Zauto_map)re   rv   rw   rx   rQ   r   rh   r   r,   r   r   isdirr   r   r   r   r   r   EnvironmentError	ExceptionopenreadjsonloadsJSONDecodeErrorr   )r{   rm   rZ   rn   ro   r   r   rq   rs   rp   rr   Zfrom_pipeliner   r   is_localZfeature_extractor_fileZresolved_feature_extractor_filereadertextr|   r'   r'   r(   ry     s     








 z1FeatureExtractionMixin.get_feature_extractor_dict)r|   r*   c                 K   s   | dd}| f |}g }| D ](\}}t||r"t||| || q"|D ]}| |d qPtd|  |r~||fS |S dS )a	  
        Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
        parameters.

        Args:
            feature_extractor_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the feature extractor object. Such a dictionary can be
                retrieved from a pretrained checkpoint by leveraging the
                [`~feature_extraction_utils.FeatureExtractionMixin.to_dict`] method.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the feature extractor object.

        Returns:
            [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature extractor object instantiated from those
            parameters.
        return_unused_kwargsFNzFeature extractor )re   r;   hasattrrg   appendrh   r   )r{   r|   rZ   r   Zfeature_extractorZ	to_removerS   rC   r'   r'   r(   rz     s    

z FeatureExtractionMixin.from_dictrT   c                 C   s   t | j}| jj|d< |S )z
        Serializes this instance to a Python dictionary.

        Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this feature extractor instance.
        Zfeature_extractor_type)copydeepcopy__dict__r&   r^   )r$   outputr'   r'   r(   to_dict?  s    zFeatureExtractionMixin.to_dict)	json_filer*   c              	   C   s6   t |ddd}| }W 5 Q R X t|}| f |S )a  
        Instantiates a feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] from the path to
        a JSON file of parameters.

        Args:
            json_file (`str` or `os.PathLike`):
                Path to the JSON file containing the parameters.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature_extractor
            object instantiated from that JSON file.
        r   r   r   )r   r   r   r   )r{   r   r   r   r|   r'   r'   r(   from_json_fileK  s    
z%FeatureExtractionMixin.from_json_filec                 C   sb   |   }| D ] \}}t|tjr| ||< q|dd}|dk	rN||d< tj|dddd S )z
        Serializes this instance to a JSON string.

        Returns:
            `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
        rf   Nrd      T)indent	sort_keys
)	r   r;   r+   r?   r@   tolistre   r   dumps)r$   
dictionaryrS   rC   rf   r'   r'   r(   to_json_string^  s    z%FeatureExtractionMixin.to_json_string)json_file_pathc              	   C   s,   t |ddd}||   W 5 Q R X dS )z
        Save this instance to a JSON file.

        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this feature_extractor instance's parameters will be saved.
        wr   r   N)r   writer   )r$   r   writerr'   r'   r(   r   s  s    z#FeatureExtractionMixin.to_json_filec                 C   s   | j j d|   S )N )r&   r^   r   r5   r'   r'   r(   __repr__~  s    zFeatureExtractionMixin.__repr__AutoFeatureExtractorc                 C   sD   t |ts|j}ddlm  m} t||s:t| d|| _dS )a  
        Register this class with a given auto class. This should only be used for custom feature extractors as the ones
        in the library are already mapped with `AutoFeatureExtractor`.

        <Tip warning={true}>

        This API is experimental and may have some slight breaking changes in the next releases.

        </Tip>

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoFeatureExtractor"`):
                The auto class to register this new feature extractor with.
        r   Nz is not a valid auto class.)	r+   r,   r^   Ztransformers.models.automodelsautor   rQ   r   )r{   Z
auto_classZauto_moduler'   r'   r(   register_for_auto_class  s    

z.FeatureExtractionMixin.register_for_auto_class)NFFNrl   )F)r   )r^   r_   r`   ra   r   r"   r,   rk   classmethodr   r   PathLiker   boolr}   r   r   r   r   ry   PreTrainedFeatureExtractorrz   r   r   r   r   r   r   r'   r'   r'   r(   rc      sB        o<l$rc   r   r   zfeature extractor file)rM   Zobject_classZobject_files).ra   r   r   r   rv   collectionsr   typingr   r   r   r   r   r   rP   r?   Zdynamic_module_utilsr
   utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rD   Z
get_loggerr^   rh   r   r   rc   r   formatr'   r'   r'   r(   <module>   s6    P
 -   6  