U
    -e0                     @  s*  d dl mZ d dlmZmZmZ d dlmZmZ d dl	m
Z
 d dlmZmZmZ dddddd	d
ddddZd'dddd
ddddZdddddd	d
ddddZdddddd	dddddZdddddd	dddddZddddddZddddd	d d!d"d#Zddddd	d$d!d%d&ZdS )(    )annotations)CallableHashableSequence)common_affixconv_sequences)is_none)EditopEditopsOpcodesN)	processorscore_cutoffzSequence[Hashable]z(Callable[..., Sequence[Hashable]] | Nonez
int | Noneint)s1s2r   r   returnc                C  s   |dk	r|| } ||}| s dS t | |\} }dt| > d }i }|j}d}| D ]}||d|B ||< |dK }qP|D ]&}	||	d}
||
@ }|| || B }qtt|t|  d d}|dks||kr|S dS )a  
    Calculates the length of the longest common subsequence

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2
    Nr      0)r   lengetbincount)r   r   r   r   Sblock	block_getxch1ch2Matchesures r!   ]/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/rapidfuzz/distance/LCSseq_py.py
similarity   s&    

r#   zdict[Hashable, int])r   r   r   r   r   c           
      C  s~   |sdS dt |> d }| j}|D ]&}||d}||@ }|| || B }q"t|t | d  d}	|d ksv|	|krz|	S dS Nr   r   r   )r   r   r   r   )
r   r   r   r   r   r   r   r   r   r    r!   r!   r"   _block_similarityE   s    
r%   c                C  sf   |dk	r|| } ||}t | |\} }tt| t|}t| |}|| }|dksZ||kr^|S |d S )a  
    Calculates the LCS distance in the range [0, max].

    This is calculated as ``max(len1, len2) - similarity``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Examples
    --------
    Find the LCS distance between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    Nr   )r   maxr   r#   )r   r   r   r   maximumsimdistr!   r!   r"   distance[   s    /
r*   zfloat | Nonefloatc                C  s~   t | st |rdS |dk	r,|| } ||}| r4|s8dS t| |\} }tt| t|}t| || }|dksv||krz|S dS )a2  
    Calculates a normalized LCS similarity in the range [1, 0].

    This is calculated as ``distance / max(len1, len2)``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
        which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 0 and 1.0
          ?Nr   r   )r   r   r&   r   r*   )r   r   r   r   r'   norm_simr!   r!   r"   normalized_distance   s    r.   c                C  sR   t | st |rdS |dk	r,|| } ||}dt| | }|dksJ||krN|S dS )a  
    Calculates a normalized LCS similarity in the range [0, 1].

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Examples
    --------
    Find the normalized LCS similarity between two strings:

    >>> from rapidfuzz.distance import LCSseq
    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein")
    0.8181818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> LCSseq.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
    0.0

    When a different processor is used s1 and s2 do not have to be strings

    >>> LCSseq.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        Nr,   r   )r   r.   )r   r   r   r   r-   r!   r!   r"   normalized_similarity   s    2r/   z([int], int))r   r   r   c                 C  s   | sdg fS dt | > d }i }|j}d}| D ]}||d|B ||< |dK }q.g }|D ]0}||d}	||	@ }
||
 ||
 B }|| qVt|t |  d  d}||fS r$   )r   r   appendr   r   )r   r   r   r   r   r   r   matrixr   r   r   r(   r!   r!   r"   _matrix  s"    

r2   r   r
   )r   r   r   r   c                C  s  |dk	r|| } ||}t | |\} }t| |\}}| |t| |  } ||t||  }t| |\}}tg dd}t| | | |_t|| | |_t| t| d|  }|dkr|S dg| }	t| }
t|}|dkr|
dkr||d  d|
d > @ r0|d8 }|
d8 }
td|
| || |	|< q|d8 }|rz||d  d|
d > @ sz|d8 }td|
| || |	|< q|
d8 }
q|
dkr|d8 }|
d8 }
td|
| || |	|< q|dkr|d8 }|d8 }td|
| || |	|< q|	|_|S )uc  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq
    >>> for tag, src_pos, dest_pos in LCSseq.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[0] s2[0]
     delete s1[3] s2[2]
     insert s1[4] s2[2]
     insert s1[6] s2[5]
    Nr      r   deleteinsert)	r   r   r   r2   r
   Z_src_lenZ	_dest_lenr	   Z_editops)r   r   r   
prefix_lenZ
suffix_lenr(   r1   editopsr)   Zeditop_listcolrowr!   r!   r"   r8     sJ    ,
 


r8   r   c                C  s   t | ||d S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import LCSseq

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in LCSseq.opcodes(a, b):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
     delete a[3:4] (x) b[2:2] ()
     insert a[4:4] () b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    r3   )r8   Z
as_opcodes)r   r   r   r!   r!   r"   opcodesy  s    2r;   )N)
__future__r   typingr   r   r   Zrapidfuzz._common_pyr   r   Zrapidfuzz._utilsr   Z!rapidfuzz.distance._initialize_pyr	   r
   r   r#   r%   r*   r.   r/   r2   r8   r;   r!   r!   r!   r"   <module>   s.   < >3=d