U
    -e
                     @   s   d dl mZmZ d dlmZmZ d dlZd dlmZ	 d dlm
Z
 d dlmZ G dd deZG dd	 d	eZde
ee ed
ddZG dd deZdd Zdd ZG dd deZdS )    )ABCabstractmethod)DictListN)Tensor)	TokenSpanc                   @   s,   e Zd Zeee eee  dddZdS )
ITokenizer
transcriptreturnc                 C   s   dS )a  Tokenize the given transcript (list of word)

        .. note::

           The toranscript must be normalized.

        Args:
            transcript (list of str): Transcript (list of word).

        Returns:
            (list of int): List of token sequences
        N selfr
   r   r   g/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/torchaudio/pipelines/_wav2vec2/aligner.py__call__   s    zITokenizer.__call__N)__name__
__module____qualname__r   r   strr   r   r   r   r   r   
   s   r   c                   @   s>   e Zd Zeeef dddZee eee  dddZdS )	Tokenizer
dictionaryc                 C   s
   || _ d S Nr   )r   r   r   r   r   __init__   s    zTokenizer.__init__r	   c                    s    fdd|D S )Nc                    s   g | ]} fd d|D qS )c                    s   g | ]} j | qS r   r   ).0cr   r   r   
<listcomp>    s     z1Tokenizer.__call__.<locals>.<listcomp>.<listcomp>r   )r   wordr   r   r   r       s     z&Tokenizer.__call__.<locals>.<listcomp>r   r   r   r   r   r      s    zTokenizer.__call__N)	r   r   r   r   r   intr   r   r   r   r   r   r   r      s   r   )emissiontokensblankc                 C   sZ   | j }| d} tj|gtj|d}tj| ||d\}}| }|d |d  }}||fS )Nr   )Zdtypedevicer"   )r#   Z	unsqueezetorchZtensorZint32FZforced_alignexp)r    r!   r"   r#   targetsaligned_tokensscoresr   r   r   _align_emission_and_tokens#   s    
r+   c                   @   s2   e Zd Zeeeee  eee  dddZdS )IAlignerr    r!   r   c                 C   s   dS )a  Generate list of time-stamped token sequences

        Args:
            emission (Tensor): Sequence of token probability distributions in log-domain.
                Shape: `(time, tokens)`.
            tokens (list of integer sequence): Tokenized transcript.
                Output from :py:class:`Wav2Vec2FABundle.Tokenizer`.

        Returns:
            (list of TokenSpan sequence): Tokens with time stamps and scores.
        Nr   )r   r    r!   r   r   r   r   0   s    zIAligner.__call__N)	r   r   r   r   r   r   r   r   r   r   r   r   r   r,   /   s   r,   c                 C   sH   t | t|kstd}g }|D ]"}|| |||   ||7 }q |S )Nr   )lensumAssertionErrorappend)list_lengthsiretlr   r   r   
_unflatten?   s    
r7   c                 C   s   dd | D S )Nc                 S   s   g | ]}|D ]}|qqS r   r   )r   r2   itemr   r   r   r   J   s       z_flatten.<locals>.<listcomp>r   )Znested_listr   r   r   _flattenI   s    r9   c                   @   s6   e Zd Zdd Zeeee  eee  dddZdS )Alignerc                 C   s
   || _ d S r   r$   )r   r"   r   r   r   r   N   s    zAligner.__init__r-   c                 C   sP   |j dkrtd|j t|t|| j\}}t||}t|dd |D S )N   z&The input emission must be 2D. Found: c                 S   s   g | ]}t |qS r   )r.   )r   tsr   r   r   r   W   s     z$Aligner.__call__.<locals>.<listcomp>)	ndim
ValueErrorshaper+   r9   r"   r&   Zmerge_tokensr7   )r   r    r!   r)   r*   spansr   r   r   r   Q   s
    
zAligner.__call__N)	r   r   r   r   r   r   r   r   r   r   r   r   r   r:   M   s   r:   )r   )abcr   r   typingr   r   r%   Ztorchaudio.functionalZ
functionalr&   r   r   r   r   r   r+   r,   r7   r9   r:   r   r   r   r   <module>   s   
