U
    9%e                     @   s^   d dl mZmZ ddlmZ ddlmZ e rFddlmZ ddl	m
Z
 dZG d	d
 d
eZdS )    )ListUnion   )is_torch_available   )Pipeline)%MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING)SpeechT5HifiGanzmicrosoft/speecht5_hifiganc                       sh   e Zd ZdZddd fdd
Zdd Zdd	 Zeee	e f d
 fddZ
dddZdd Z  ZS )TextToAudioPipelineaV  
    Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`. This
    pipeline generates an audio file from an input text and optional other conditional inputs.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> pipe = pipeline(model="suno/bark-small")
    >>> output = pipe("Hey it's HuggingFace on the phone!")

    >>> audio = output["audio"]
    >>> sampling_rate = output["sampling_rate"]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)


    This pipeline can currently be loaded from [`pipeline`] using the following task identifiers: `"text-to-speech"` or
    `"text-to-audio"`.

    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=text-to-speech).
    N)vocodersampling_ratec                   s   t  j|| | jdkr tdd | _| jjt krX|d krRt	
t| jjn|| _|| _| jd k	rt| jjj| _| jd kr| jj}| jjdd }|d k	r||  dD ]}t||d }|d k	r|| _qd S )Ntfz5The TextToAudioPipeline is only available in PyTorch.generation_config)Zsample_rater   )super__init__Z	framework
ValueErrorr   model	__class__r   valuesr	   Zfrom_pretrainedDEFAULT_VOCODER_IDtodevicer   config__dict__getupdateto_dictgetattr)selfr   r   argskwargsr   Z
gen_configZsampling_rate_namer    c/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/transformers/pipelines/text_to_audio.pyr   5   s*    


zTextToAudioPipeline.__init__c                 K   sd   t |tr|g}| jjjdkrJ| jjjddddddd}|| |}| j	|f|dd	i}|S )
NZbarkZmax_input_semantic_length   FT
max_length)r%   Zadd_special_tokensZreturn_attention_maskZreturn_token_type_idspaddingZreturn_tensorspt)

isinstancestrr   r   Z
model_typer   Zsemantic_configr   r   Z	tokenizer)r   textr    Z
new_kwargsoutputr"   r"   r#   
preprocessT   s    
	
zTextToAudioPipeline.preprocessc                 K   sZ   | j || jd}| j r.| jjf ||}n| jf ||d }| jd k	rV| |}|S )N)r   r   )Z_ensure_tensor_on_devicer   r   Zcan_generategenerater   )r   Zmodel_inputsr    r+   r"   r"   r#   _forwardk   s    


zTextToAudioPipeline._forward)text_inputsc                    s   t  j|f|S )a  
        Generates speech/audio from the inputs. See the [`TextToAudioPipeline`] documentation for more information.

        Args:
            text_inputs (`str` or `List[str]`):
                The text(s) to generate.
            forward_params (*optional*):
                Parameters passed to the model generation/forward method.

        Return:
            A `dict` or a list of `dict`: The dictionaries have two keys:

            - **audio** (`np.ndarray` of shape `(nb_channels, audio_length)`) -- The generated audio waveform.
            - **sampling_rate** (`int`) -- The sampling rate of the generated audio waveform.
        )r   __call__)r   r/   forward_paramsr!   r"   r#   r0   z   s    zTextToAudioPipeline.__call__c                 C   s&   |d kri }|d kri }i }|||fS )Nr"   )r   Zpreprocess_paramsr1   Zpostprocess_paramsr"   r"   r#   _sanitize_parameters   s    z(TextToAudioPipeline._sanitize_parametersc                 C   s&   i }|    |d< | j|d< |S )NZaudior   )cpufloatnumpyr   )r   ZwaveformZoutput_dictr"   r"   r#   postprocess   s    
zTextToAudioPipeline.postprocess)NN)__name__
__module____qualname____doc__r   r,   r.   r   r)   r   r0   r2   r6   __classcell__r"   r"   r!   r#   r
      s     
r
   N)typingr   r   utilsr   baser   Zmodels.auto.modeling_autor   Z!models.speecht5.modeling_speecht5r	   r   r
   r"   r"   r"   r#   <module>   s   