U
    -en4                     @   s  d Z ddlmZ ddlmZmZmZ ddlmZ ddl	Z	ddl
mZ ddlmZ ddlmZmZ d	d
dddgZeG dd	 d	ZeG dd
 d
Zeefeeee f eeee f eejejf eejejf edddZeG dd dZeefeeee f eeee f eejejf eejejf edddZeeee f eejejf edddZeedddZeee  eee  dddZdS )z
The core algorithm(s) for processing a one or more reference and hypothesis sentences
so that measures can be computed and an alignment can be visualized.
    )	dataclass)AnyListUnion)chainN)Opcodes)
transforms)wer_defaultcer_defaultAlignmentChunk
WordOutputCharacterOutputprocess_wordsprocess_charactersc                   @   sB   e Zd ZU dZeed< eed< eed< eed< eed< dd Zd	S )
r   a  
    Define an alignment between two subsequence of the reference and hypothesis.

    Attributes:
        type: one of `equal`, `substitute`, `insert`, or `delete`
        ref_start_idx: the start index of the reference subsequence
        ref_end_idx: the end index of the reference subsequence
        hyp_start_idx: the start index of the hypothesis subsequence
        hyp_end_idx: the end index of the hypothesis subsequence
    typeref_start_idxref_end_idxhyp_start_idxhyp_end_idxc                 C   sn   | j dkrtd| j dkr"d| _ | j| jkrFtd| j d| j | j| jkrjtd| j d| j d S )	N)replaceinsertdeleteequal
substitute r   r   zref_start_idx=z is larger than ref_end_idx=zhyp_start_idx=z is larger than hyp_end_idx=)r   
ValueErrorr   r   r   r   )self r   N/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/jiwer/process.py__post_init__C   s    

zAlignmentChunk.__post_init__N)__name__
__module____qualname____doc__str__annotations__intr   r   r   r   r   r   .   s   
c                   @   s   e Zd ZU dZeee  ed< eee  ed< eee  ed< eed< eed< eed< eed< e	ed	< e	ed
< e	ed< e	ed< dS )r   a  
    The output of calculating the word-level levenshtein distance between one or more
    reference and hypothesis sentence(s).

    Attributes:
        references: The reference sentences
        hypotheses: The hypothesis sentences
        alignments: The alignment between reference and hypothesis sentences
        wer: The word error rate
        mer: The match error rate
        wil: The word information lost measure
        wip: The word information preserved measure
        hits: The number of correct words between reference and hypothesis sentences
        substitutions: The number of substitutions required to transform hypothesis
                       sentences to reference sentences
        insertions: The number of insertions required to transform hypothesis
                       sentences to reference sentences
        deletions: The number of deletions required to transform hypothesis
                       sentences to reference sentences

    
references
hypotheses
alignmentswermerwilwiphitssubstitutions
insertions	deletionsN
r    r!   r"   r#   r   r$   r%   r   floatr&   r   r   r   r   r   Y   s   
)	reference
hypothesisreference_transformhypothesis_transformreturnc                 C   s
  t | tr| g} t |tr |g}tdd | D r:tdt| |dd}t||dd}t|t|krtdt| dt| d	t||\}}d
\}}	}
}d\}}g }t||D ]\}}tj	j
||}tdd |D }tdd |D }tdd |D }t|||  }||7 }|	|7 }	|
|7 }
||7 }|t|7 }|t|7 }|dd t|D  q|	|
||f\}}}}t|| | t|| |  }t|| | t|| | |  }|dkrt|| t||  nd}d| }t|||||||||	||
dS )aD  
    Compute the word-level levenshtein distance and alignment between one or more
    reference and hypothesis sentences. Based on the result, multiple measures
    can be computed, such as the word error rate.

    Args:
        reference: The reference sentence(s)
        hypothesis: The hypothesis sentence(s)
        reference_transform: The transformation(s) to apply to the reference string(s)
        hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)

    Returns:
        (WordOutput): The processed reference and hypothesis sentences
    c                 s   s   | ]}t |d kV  qdS )r   N)len).0tr   r   r   	<genexpr>   s     z process_words.<locals>.<genexpr>z(one or more references are empty stringsT)is_referenceFzoAfter applying the transforms on the reference and hypothesis sentences, their lengths must match. Instead got z reference and z hypothesis sentences.)r   r   r   r   )r   r   c                 s   s    | ]}|j d krdndV  qdS )r      r   Ntagr:   opr   r   r   r<      s     c                 s   s    | ]}|j d krdndV  qdS )r   r>   r   Nr?   rA   r   r   r   r<      s     c                 s   s    | ]}|j d krdndV  qdS )r   r>   r   Nr?   rA   r   r   r   r<      s     c              	   S   s(   g | ] }t |j|j|j|j|jd qS ))r   r   r   r   r   )r   r@   Z	src_startZsrc_endZ
dest_startZdest_endrA   r   r   r   
<listcomp>   s   z!process_words.<locals>.<listcomp>r>   r   )r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   )
isinstancer$   anyr   _apply_transformr9   
_word2charzip	rapidfuzzZdistanceZLevenshteinZeditopssumappendr   Zfrom_editopsr3   r   )r4   r5   r6   r7   Zref_transformedZhyp_transformedZref_as_charsZhyp_as_charsZnum_hitsZnum_substitutionsZnum_deletionsZnum_insertionsZnum_rf_wordsZnum_hp_wordsr)   Zreference_sentenceZhypothesis_sentenceZedit_opsr/   r1   r0   r.   SDIHr*   r+   r-   r,   r   r   r   r      s    

    	  $c                   @   sj   e Zd ZU dZeee  ed< eee  ed< eee  ed< eed< e	ed< e	ed< e	ed< e	ed	< d
S )r   ae  
    The output of calculating the character-level levenshtein distance between one or
    more reference and hypothesis sentence(s).

    Attributes:
        references: The reference sentences
        hypotheses: The hypothesis sentences
        alignments: The alignment between reference and hypothesis sentences
        cer: The character error rate
        hits: The number of correct characters between reference and hypothesis
              sentences
        substitutions: The number of substitutions required to transform hypothesis
                       sentences to reference sentences
        insertions: The number of insertions required to transform hypothesis
                       sentences to reference sentences
        deletions: The number of deletions required to transform hypothesis
                       sentences to reference sentences
    r'   r(   r)   cerr.   r/   r0   r1   Nr2   r   r   r   r   r      s   
c              
   C   s6   t | |||}t|j|j|j|j|j|j|j|j	dS )a  
    Compute the character-level levenshtein distance and alignment between one or more
    reference and hypothesis sentences. Based on the result, the character error rate
    can be computed.

    Note that the by default this method includes space (` `) as a
    character over which the error rate is computed. If this is not desired, the
    reference and hypothesis transform need to be modified.

    Args:
        reference: The reference sentence(s)
        hypothesis: The hypothesis sentence(s)
        reference_transform: The transformation(s) to apply to the reference string(s)
        hypothesis_transform: The transformation(s) to apply to the hypothesis string(s)

    Returns:
        (CharacterOutput): The processed reference and hypothesis sentences.

    )r'   r(   r)   rP   r.   r/   r0   r1   )
r   r   r'   r(   r)   r*   r.   r/   r0   r1   )r4   r5   r6   r7   resultr   r   r   r   %  s        )sentence	transformr=   c                 C   s:   || }|r"t |dds6tdnt |dds6td|S )NT)require_non_empty_listsz~After applying the transformation, each reference should be a non-empty list of strings, with each string being a single word.FzuAfter applying the transformation, each hypothesis should be a list of strings, with each string being a single word.)_is_list_of_list_of_stringsr   )rR   rS   r=   Ztransformed_sentencer   r   r   rF   U  s"      rF   )xrT   c                 C   sZ   t | tsdS | D ]B}t |ts& dS |r<t|dkr< dS tdd |D s dS qdS )NFr   c                 S   s   g | ]}t |tqS r   )rD   r$   )r:   sr   r   r   rC   ~  s     z/_is_list_of_list_of_strings.<locals>.<listcomp>T)rD   listr9   all)rV   rT   er   r   r   rU   s  s    

rU   )r4   r5   c                    sb   t t| | }d|kr tdtt|tt|  fdd| D } fdd|D }||fS )Nr   z]Empty strings cannot be a word. Please ensure that the given transform removes empty strings.c                    s$   g | ]}d   fdd|D qS )r   c                    s   g | ]}t  | qS r   chrr:   wZ	word2charr   r   rC     s     )_word2char.<locals>.<listcomp>.<listcomp>joinr:   rR   r_   r   r   rC     s    z_word2char.<locals>.<listcomp>c                    s$   g | ]}d   fdd|D qS )r   c                    s   g | ]}t  | qS r   r[   r]   r_   r   r   rC     s     r`   ra   rc   r_   r   r   rC     s    )setr   r   dictrH   ranger9   )r4   r5   Z
vocabularyZreference_charsZhypothesis_charsr   r_   r   rG     s    

rG   )r#   dataclassesr   typingr   r   r   	itertoolsr   rI   Zrapidfuzz.distancer   Zjiwerr   trZjiwer.transformationsr	   r
   __all__r   r   r$   ZComposeZAbstractTransformr   r   r   boolrF   rU   rG   r   r   r   r   <module>   sV   	*.z(1