U
    -e)G                  	   @  s  d dl mZ d dlmZmZmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ dddd	d
ddZdddd	d
ddZddd	dddZdddddddddddd	dddZdddddddddddd	dddZddddddddddddddd Zdddddddddddddd!d"Zddd	dd#d$Zddd%ddddd&d'd(d)Zddd%ddddd*d'd+d,ZdS )-    )annotations)CallableHashableSequence)common_affixconv_sequences)is_none)Indel_py)EditopEditopsOpcodeszSequence[Hashable]ztuple[int, int, int]int)s1s2weightsreturnc           	      C  sl   t | }t |}|\}}}|| ||  }||krNt||| || |  }nt||| || |  }|S )N)lenmin)	r   r   r   len1Zlen2insertdeletereplaceZmax_dist r   b/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/rapidfuzz/distance/Levenshtein_py.py_levenshtein_maximum   s    
r   c                 C  s   t | }|\}}}ttd|d | |}|D ]v}|d }	|d  |7  < t|D ]P}
|	}| |
 |krt||
 | ||
d  | |	| }||
d  }	|||
d < qRq.|d S )Nr      )r   listranger   )r   r   r   r   r   r   r   cachech2tempixr   r   r   _uniform_generic   s    
$r$   )r   r   r   c                 C  s   | st |S dt | > d }d}t | }dt | d > }i }|j}d}| D ]}	||	d|B ||	< |dK }qJ|D ]}
||
d}|}||@ | |A |B |B }|||B  B }||@ }|||@ dk7 }|||@ dk8 }|d> dB }|d> }|||B  B }||@ }qn|S Nr   r   )r   get)r   r   VPVNcurrDistmaskblock	block_getr#   ch1r    PM_jXD0HPHNr   r   r   _uniform_distance/   s2    


r3   r   r   r   N)r   	processorscore_cutoff
score_hintztuple[int, int, int] | Nonez(Callable[..., Sequence[Hashable]] | Nonez
int | None)r   r   r   r5   r6   r7   r   c                C  s   |}|dk	r|| } ||}t | |\} }|dks:|dkrFt| |}n"|dkr\t| |}nt| ||}|dksx||kr||S |d S )a  
    Calculates the minimum number of insertions, deletions, and substitutions
    required to change one sequence into the other according to Levenshtein with custom
    costs for insertion, deletion and substitution

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the Levenshtein distance between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.distance("lewenstein", "levenshtein")
    2

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.distance("lewenstein", "levenshtein", weights=(1,1,2))
    3
    Nr4   )r   r      r   )r   r3   Indeldistancer$   )r   r   r   r5   r6   r7   _distr   r   r   r:   S   s    Br:   c          
      C  sl   |}|dk	r|| } ||}t | |\} }|p0d}t| ||}t| ||d}|| }	|dksd|	|krh|	S dS )a  
    Calculates the levenshtein similarity in the range [max, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``max - distance``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.
    score_hint : int, optional
        Expected similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
    Nr4   r   r   )r   r   r:   )
r   r   r   r5   r6   r7   r;   maximumr<   simr   r   r   
similarity   s    0r@   zfloat | Nonefloatc          
      C  s   |}t | st |rdS |dk	r0|| } ||}t| |\} }|pDd}t| ||}t| ||d}|rl|| nd}	|dks|	|kr|	S dS )a  
    Calculates a normalized levenshtein distance in the range [1, 0] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``distance / max``, where max is the maximal possible
    Levenshtein distance given the lengths of the sequences s1/s2 and the weights.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : float, optional
        Expected normalized distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 1.0 and 0.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown
          ?Nr4   r=   r   r   )r   r   r   r:   )
r   r   r   r5   r6   r7   r;   r>   r<   	norm_distr   r   r   normalized_distance   s    /rD   c          	      C  st   |}t | st |rdS |dk	r0|| } ||}t| |\} }|pDd}t| ||d}d| }|dksl||krp|S dS )a  
    Calculates a normalized levenshtein similarity in the range [0, 1] using custom
    costs for insertion, deletion and substitution.

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
        (insertion, deletion, substitution). Default is (1, 1, 1),
        which gives all three operations a weight of 1.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is None,
        which deactivates this behaviour.
    score_hint : int, optional
        Expected normalized similarity between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Raises
    ------
    ValueError
        If unsupported weights are provided a ValueError is thrown

    Examples
    --------
    Find the normalized Levenshtein similarity between two strings:

    >>> from rapidfuzz.distance import Levenshtein
    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein")
    0.81818181818181

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.85)
    0.0

    It is possible to select different weights by passing a `weight`
    tuple.

    >>> Levenshtein.normalized_similarity("lewenstein", "levenshtein", weights=(1,1,2))
    0.85714285714285

    When a different processor is used s1 and s2 do not have to be strings

    >>> Levenshtein.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.81818181818181
    g        Nr4   r=   rB   r   )r   r   rD   )	r   r   r   r5   r6   r7   r;   rC   Znorm_simr   r   r   normalized_similarity!  s    GrE   c                 C  s&  | st |g g fS dt | > d }d}t | }dt | d > }i }|j}d}| D ]}	||	d|B ||	< |dK }qPg }
g }|D ]}||d}|}||@ | |A |B |B }|||B  B }||@ }|||@ dk7 }|||@ dk8 }|d> dB }|d> }|||B  B }||@ }|
| || q|||
|fS r%   )r   r&   append)r   r   r'   r(   r)   r*   r+   r,   r#   r-   Z	matrix_VPZ	matrix_VNr    r.   r/   r0   r1   r2   r   r   r   _matrixw  s:    


rG   r5   r7   r   )r   r   r5   r7   r   c                C  s  |}|dk	r|| } ||}t | |\} }t| |\}}| |t| |  } ||t||  }t| |\}}}	tg dd}
t| | | |
_t|| | |
_|dkr|
S dg| }t| }t|}|dkr|dkr||d  d|d > @ r|d8 }|d8 }td|| || ||< q|d8 }|rh|	|d  d|d > @ rh|d8 }td|| || ||< q|d8 }| | || kr|d8 }td|| || ||< q|dkr|d8 }|d8 }td|| || ||< q|dkr|d8 }|d8 }td|| || ||< q||
_|
S )u  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein
    >>> for tag, src_pos, dest_pos in Levenshtein.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[1] s2[0]
    replace s1[3] s2[2]
     insert s1[6] s2[5]
    Nr   r   r   r   r   )	r   r   r   rG   r   Z_src_lenZ	_dest_lenr
   Z_editops)r   r   r5   r7   r;   
prefix_lenZ
suffix_lenr<   r'   r(   editopsZeditop_listcolrowr   r   r   rJ     sP    /
 

rJ   r   c                C  s   t | |||d S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor : callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_hint : int, optional
        Expected distance between s1 and s2. This is used to select a
        faster implementation. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Levenshtein

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in Levenshtein.opcodes("qabxcd", "abycdf"):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
    replace a[3:4] (x) b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    rH   )rJ   Z
as_opcodes)r   r   r5   r7   r   r   r   opcodes  s    5rM   )
__future__r   typingr   r   r   Zrapidfuzz._common_pyr   r   Zrapidfuzz._utilsr   Zrapidfuzz.distancer	   r9   Z!rapidfuzz.distance._initialize_pyr
   r   r   r   r$   r3   r:   r@   rD   rE   rG   rJ   rM   r   r   r   r   <module>   sF   (VACV-k