U
    ,:%e'2                     @   sn  d dl mZmZmZmZmZ d dlZd dlmZ ddgZ	eee
 ejeeej  ef Zde_eee
 ddd	Zeejdd
dZeeeej  dddZeedddZeedddZee eeej  dddZeeej  e
ejeeej  dddZeedddZee eje
eejejejf dddZeee ddddZG d d dejjZdS )!    )CallableDictListOptionalTupleN)RNNT
HypothesisRNNTBeamSearchzHypothesis generated by RNN-T beam search decoder,
    represented as tuple of (tokens, prediction network output, prediction network state, score).
    )hyporeturnc                 C   s   | d S Nr    r
   r   r   ]/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torchaudio/models/rnnt_decoder.py_get_hypo_tokens   s    r   c                 C   s   | d S N   r   r   r   r   r   _get_hypo_predictor_out   s    r   c                 C   s   | d S )N   r   r   r   r   r   _get_hypo_state   s    r   c                 C   s   | d S )N   r   r   r   r   r   _get_hypo_score   s    r   c                 C   s   t | d S r   )strr   r   r   r   _get_hypo_key    s    r   )hyposr   c              	      sn   g }t tt| d D ]P g }t tt| d   D ]$|t fdd| D  q8|| q|S )Nr   c                    s   g | ]}t |   qS r   )r   .0r
   ijr   r   
<listcomp>)   s     z _batch_state.<locals>.<listcomp>)rangelenr   appendtorchcat)r   statesZbatched_state_componentsr   r   r   _batch_state$   s    "r'   )r&   idxdevicer   c                    s"   t j|g|d  fdd| D S )Nr)   c                    s   g | ]} fd d|D qS )c                    s   g | ]}| d  qS )r   )Zindex_select)r   stateZ
idx_tensorr   r   r    0   s     z+_slice_state.<locals>.<listcomp>.<listcomp>r   )r   Zstate_tupler,   r   r   r    0   s     z _slice_state.<locals>.<listcomp>)r$   tensor)r&   r(   r)   r   r,   r   _slice_state.   s    r.   c                 C   s   t | tt| d  S r   )r   r"   r   r   r   r   r   _default_hypo_sort_key3   s    r/   )r   next_token_probs
beam_widthr   c           	      C   sr   t dd | D d}||d d d df  }|d|\}}|j|jd dd}||jd  }|||fS )Nc                 S   s   g | ]}t |qS r   r   r   hr   r   r   r    <   s     z+_compute_updated_scores.<locals>.<listcomp>r   trunc)Zrounding_mode)r$   r-   	unsqueezeZreshapetopkdivshape)	r   r0   r1   Zhypo_scoresZnonblank_scoresnonblank_nbest_scoresZnonblank_nbest_idxnonblank_nbest_hypo_idxnonblank_nbest_tokenr   r   r   _compute_updated_scores7   s    r>   )r
   	hypo_listr   c                 C   s0   t |D ]"\}}t| t|kr||=  q,qd S N)	enumerater   )r
   r?   r   elemr   r   r   _remove_hypoD   s    rC   c                       s  e Zd ZdZd eeeeee	gef  edd fddZ
ejee	 dd	d
Zejee	 ejejdddZee	 ee	 ejeee	f ee	 dddZee	 ee	 ejeeejee	 dddZee	 ee ee eejee	 dddZejeee	  eee	 dddZejejeee	 dddZejjd!ejejeeeeej   eee	  eee	 eeej  f dddZ  ZS )"r	   a)  Beam search decoder for RNN-T model.

    See Also:
        * :class:`torchaudio.pipelines.RNNTBundle`: ASR pipeline with pretrained model.

    Args:
        model (RNNT): RNN-T model to use.
        blank (int): index of blank token in vocabulary.
        temperature (float, optional): temperature to apply to joint network output.
            Larger values yield more uniform samples. (Default: 1.0)
        hypo_sort_key (Callable[[Hypothesis], float] or None, optional): callable that computes a score
            for a given hypothesis to rank hypotheses by. If ``None``, defaults to callable that returns
            hypothesis score normalized by token sequence length. (Default: None)
        step_max_tokens (int, optional): maximum number of tokens to emit per input time step. (Default: 100)
          ?Nd   )modelblanktemperaturehypo_sort_keystep_max_tokensr   c                    s<   t    || _|| _|| _|d kr,t| _n|| _|| _d S r@   )super__init__rF   rG   rH   r/   rI   rJ   )selfrF   rG   rH   rI   rJ   	__class__r   r   rL   \   s    
zRNNTBeamSearch.__init__)r)   r   c           	      C   sZ   | j }d }tjdg|d}| jtj|gg|d||\}}}|g|d  |df}|gS )Nr   r*   r   g        )rG   r$   r-   rF   predictdetach)	rM   r)   tokenr+   
one_tensorpred_out_Z
pred_stateZ	init_hypor   r   r   _init_b_hyposp   s    $
zRNNTBeamSearch._init_b_hypos)enc_outr   r)   r   c              	   C   s~   t jdg|d}t jdd |D dd}| j|||t jdgt| |d\}}}t jjj|| j	 dd}|d d ddf S )Nr   r*   c                 S   s   g | ]}t |qS r   )r   r3   r   r   r   r       s     z8RNNTBeamSearch._gen_next_token_probs.<locals>.<listcomp>r   )dimr   )
r$   r-   stackrF   joinr"   nnZ
functionalZlog_softmaxrH   )rM   rW   r   r)   rS   Zpredictor_outZ
joined_outrU   r   r   r   _gen_next_token_probs~   s    
z$RNNTBeamSearch._gen_next_token_probs)b_hyposa_hyposr0   key_to_b_hypor   c                    s   t t|D ]}|| }t|||df  }t||krh|t| }t|  ttt||}	nt|}	t	|t
|t||	f} | ||t|< qtdd  D  \}
} fdd|D S )Nr5   c                 S   s   g | ]}t |qS r   r2   r   r   r   r   r       s     z/RNNTBeamSearch._gen_b_hypos.<locals>.<listcomp>c                    s   g | ]} | qS r   r   r   r(   r]   r   r   r       s     )r!   r"   r   r   rC   floatr$   r-   Z	logaddexpr   r   r   r#   sort)rM   r]   r^   r0   r_   r   h_aZappend_blank_scoreZh_bscorerU   
sorted_idxr   ra   r   _gen_b_hypos   s"    

zRNNTBeamSearch._gen_b_hypos)r^   r]   r0   tr1   r)   r   c                 C   s   t |||\}}}	t||k r*td }
nt||  }
g }g }g }t|D ]N}t|| }||
krLt|| }|||  |t|	|  || qL|r| |||||}ng }|S )Ninf)r>   r"   rb   r   r!   intr#   _gen_new_hypos)rM   r^   r]   r0   rh   r1   r)   r;   r<   r=   Zb_nbest_score
base_hypos
new_tokensZ
new_scoresr   re   Z
a_hypo_idx	new_hyposr   r   r   _gen_a_hypos   s,    
zRNNTBeamSearch._gen_a_hypos)rl   tokensscoresrh   r)   r   c              	   C   s   t jdd |D |d}t|}| j|t jdgt| |d|\}}	}
g }t|D ]@\}}t||| g }|||| 	 t
|
|||| f qV|S )Nc                 S   s   g | ]
}|gqS r   r   )r   rR   r   r   r   r       s     z1RNNTBeamSearch._gen_new_hypos.<locals>.<listcomp>r*   r   )r$   r-   r'   rF   rP   r"   rA   r   r#   rQ   r.   )rM   rl   rp   rq   rh   r)   Z
tgt_tokensr&   rT   rU   Zpred_statesrn   r   rd   rm   r   r   r   rk      s    
(zRNNTBeamSearch._gen_new_hypos)rW   r
   r1   r   c              	      s   |j d }|j}g }|d kr&|n| t|D ]} }tjtt g  i }d}	|r	|d d ||d f ||}
|

 }
 ||
| |	jkrq| |
|||}|rT|	d7 }	qTtfdd D |\}} fdd|D  q2 S )Nr   r   c                    s   g | ]}  |qS r   )rI   )r   Zhyp)rM   r   r   r      s     z*RNNTBeamSearch._search.<locals>.<listcomp>c                    s   g | ]} | qS r   r   r`   ra   r   r   r      s     )r:   r)   rV   r!   r$   jitZannotater   r   r\   cpurg   rJ   ro   r-   r8   )rM   rW   r
   r1   Zn_time_stepsr)   r^   rh   r_   Zsymbols_current_tr0   rU   rf   r   )r]   rM   r   _search   s8    
"

"zRNNTBeamSearch._search)inputlengthr1   r   c                 C   s   |  dkr.|  dkr&|jd dks.td|  dkrD|d}|jdkr`|jdkr`td|  dkrv|d}| j||\}}| |d	|S )
a  Performs beam search for the given input sequence.

        T: number of frames;
        D: feature dimension of each frame.

        Args:
            input (torch.Tensor): sequence of input frames, with shape (T, D) or (1, T, D).
            length (torch.Tensor): number of valid frames in input
                sequence, with shape () or (1,).
            beam_width (int): beam size to use during search.

        Returns:
            List[Hypothesis]: top-``beam_width`` hypotheses found by beam search.
        r   r   r   r   *input must be of shape (T, D) or (1, T, D)r   r   "length must be of shape () or (1,)N)rX   r:   
ValueErrorr7   rF   Z
transcribert   )rM   ru   rv   r1   rW   rU   r   r   r   forward  s    &

zRNNTBeamSearch.forward)ru   rv   r1   r+   
hypothesisr   c                 C   s   |  dkr.|  dkr&|jd dks.td|  dkrD|d}|jdkr`|jdkr`td|  dkrv|d}| j|||\}}}| ||||fS )	a  Performs beam search for the given input sequence in streaming mode.

        T: number of frames;
        D: feature dimension of each frame.

        Args:
            input (torch.Tensor): sequence of input frames, with shape (T, D) or (1, T, D).
            length (torch.Tensor): number of valid frames in input
                sequence, with shape () or (1,).
            beam_width (int): beam size to use during search.
            state (List[List[torch.Tensor]] or None, optional): list of lists of tensors
                representing transcription network internal state generated in preceding
                invocation. (Default: ``None``)
            hypothesis (List[Hypothesis] or None): hypotheses from preceding invocation to seed
                search with. (Default: ``None``)

        Returns:
            (List[Hypothesis], List[List[torch.Tensor]]):
                List[Hypothesis]
                    top-``beam_width`` hypotheses found by beam search.
                List[List[torch.Tensor]]
                    list of lists of tensors representing transcription network
                    internal state generated in current invocation.
        r   r   r   r   rw   r   rx   ry   )rX   r:   rz   r7   rF   Ztranscribe_streamingrt   )rM   ru   rv   r1   r+   r|   rW   rU   r   r   r   infer'  s    !&

zRNNTBeamSearch.infer)rD   NrE   )NN)__name__
__module____qualname____doc__r   rj   rb   r   r   r   rL   r$   r)   r   rV   Tensorr\   r   r   rg   ro   rk   rt   r{   rr   Zexportr   r}   __classcell__r   r   rN   r   r	   K   sp        
(
)  
)typingr   r   r   r   r   r$   Ztorchaudio.modelsr   __all__rj   r   rb   r   r   r   r   r   r   r   r   r'   r)   r.   r/   r>   rC   r[   Moduler	   r   r   r   r   <module>   s(    
*