U
    9%eM"                     @   s   d dl Z d dlmZ d dlZd dlZddlmZmZm	Z	m
Z
 ddlmZmZ e r^ddlmZ e
eZeeejdd	d
ZeeG dd deZdS )    N)Union   )add_end_docstringsis_torch_availableis_torchaudio_availablelogging   )PIPELINE_INIT_ARGSPipeline),MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES)bpayloadsampling_ratereturnc           
      C   s   | }d}d}dddd|d|d|d	d
ddg}zt j|t jt jd}W n tk
r`   tdY nX || }|d }t|tj}	|	j	d dkrtd|	S )z?
    Helper function to read an audio file through ffmpeg.
    1Zf32leZffmpegz-izpipe:0z-acz-arz-fz-hide_bannerz	-loglevelquietzpipe:1)stdinstdoutzFffmpeg was not found but is required to load audio files from filenamer   zMalformed soundfile)

subprocessPopenPIPEFileNotFoundError
ValueErrorcommunicatenpZ
frombufferZfloat32shape)
r   r   aracZformat_for_conversionZffmpeg_commandZffmpeg_processZoutput_streamZ	out_bytesZaudio r   j/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/pipelines/audio_classification.pyffmpeg_read   s6    
r   c                       sb   e Zd ZdZ fddZeejee	f d fddZ
ddd	Zd
d Zdd ZdddZ  ZS )AudioClassificationPipelinea  
    Audio classification pipeline using any `AutoModelForAudioClassification`. This pipeline predicts the class of a
    raw waveform or an audio file. In case of an audio file, ffmpeg should be installed to support multiple audio
    formats.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="superb/wav2vec2-base-superb-ks")
    >>> classifier("https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/1.flac")
    [{'score': 0.997, 'label': '_unknown_'}, {'score': 0.002, 'label': 'left'}, {'score': 0.0, 'label': 'yes'}, {'score': 0.0, 'label': 'down'}, {'score': 0.0, 'label': 'stop'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)


    This pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"audio-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=audio-classification).
    c                    s@   d|d< t  j|| | jdkr2td| j d| t d S )N   top_kptzThe z is only available in PyTorch.)super__init__Z	frameworkr   	__class__Zcheck_model_typer   )selfargskwargsr&   r   r   r%   ]   s
    
z$AudioClassificationPipeline.__init__)inputsc                    s   t  j|f|S )a  
        Classify the sequence(s) given as inputs. See the [`AutomaticSpeechRecognitionPipeline`] documentation for more
        information.

        Args:
            inputs (`np.ndarray` or `bytes` or `str` or `dict`):
                The inputs is either :
                    - `str` that is the filename of the audio file, the file will be read at the correct sampling rate
                      to get the waveform using *ffmpeg*. This requires *ffmpeg* to be installed on the system.
                    - `bytes` it is supposed to be the content of an audio file and is interpreted by *ffmpeg* in the
                      same way.
                    - (`np.ndarray` of shape (n, ) of type `np.float32` or `np.float64`)
                        Raw audio at the correct sampling rate (no further check will be done)
                    - `dict` form can be used to pass raw audio sampled at arbitrary `sampling_rate` and let this
                      pipeline do the resampling. The dict must be either be in the format `{"sampling_rate": int,
                      "raw": np.array}`, or `{"sampling_rate": int, "array": np.array}`, where the key `"raw"` or
                      `"array"` is used to denote the raw audio waveform.
            top_k (`int`, *optional*, defaults to None):
                The number of top labels that will be returned by the pipeline. If the provided number is `None` or
                higher than the number of labels available in the model configuration, it will default to the number of
                labels.

        Return:
            A list of `dict` with the following keys:

            - **label** (`str`) -- The label predicted.
            - **score** (`float`) -- The corresponding probability.
        )r$   __call__)r'   r+   r)   r*   r   r   r,   g   s    !z$AudioClassificationPipeline.__call__Nc                 K   s6   i }|d k	r,|| j jjkr$| j jj}||d< i i |fS )Nr"   )modelconfigZ
num_labels)r'   r"   r)   Zpostprocess_paramsr   r   r   _sanitize_parameters   s    
z0AudioClassificationPipeline._sanitize_parametersc              	   C   s\  t |trJ|ds|dr,t|j}nt|d}| }W 5 Q R X t |trbt	|| j
j}t |trd|krd|ksd|kstd|dd }|d kr|dd  |dd }|d}|}|| j
jkrd	d l}t rd	d
lm} ntd||||| j
j }t |tjs,tdt|jdkrDtd| j
|| j
jdd}|S )Nzhttp://zhttps://rbr   rawarrayzWhen passing a dictionary to AudioClassificationPipeline, the dict needs to contain a "raw" key containing the numpy array representing the audio and a "sampling_rate" key, containing the sampling_rate associated with that arraypathr   )
functionalztorchaudio is required to resample audio samples in AudioClassificationPipeline. The torchaudio package can be installed through: `pip install torchaudio`.z"We expect a numpy ndarray as inputr   zFWe expect a single channel audio input for AudioClassificationPipeliner#   )r   Zreturn_tensors)
isinstancestr
startswithrequestsgetcontentopenreadbytesr   Zfeature_extractorr   dictr   poptorchr   Z
torchaudior4   ImportErrorZresampleZ
from_numpynumpyr   ndarraylenr   )r'   r+   fZ_inputsZin_sampling_rater@   F	processedr   r   r   
preprocess   sP    


    z&AudioClassificationPipeline.preprocessc                 C   s   | j f |}|S )N)r-   )r'   Zmodel_inputsmodel_outputsr   r   r   _forward   s    z$AudioClassificationPipeline._forwardr!   c                    sJ   |j d d}||\}}| }| } fddt||D }|S )Nr   c                    s$   g | ]\}}| j jj| d qS ))scorelabel)r-   r.   Zid2label).0rL   Z_idr'   r   r   
<listcomp>   s     z;AudioClassificationPipeline.postprocess.<locals>.<listcomp>)ZlogitsZsoftmaxZtopktolistzip)r'   rI   r"   ZprobsZscoresZidslabelsr   rO   r   postprocess   s    z'AudioClassificationPipeline.postprocess)N)r!   )__name__
__module____qualname____doc__r%   r   r   rC   r=   r6   r,   r/   rH   rJ   rT   __classcell__r   r   r*   r   r    B   s   #
	7r    )r   typingr   rB   r   r8   utilsr   r   r   r   baser	   r
   Zmodels.auto.modeling_autor   Z
get_loggerrU   loggerr=   intr2   r   r    r   r   r   r   <module>   s   
$