U
    &%e9!  ã                   @   s<   d dl mZmZ ddlmZ ddlmZ G dd„ deƒZdS )é    )ÚListÚUnioné   )ÚCharSetProber)ÚProbingStatec                       s  e Zd ZdZdZdZddœ‡ fdd„Zddœ‡ fdd	„Zee	dœd
d„ƒZ
ee	dœdd„ƒZedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zedœdd„Zee ddœdd„Zee ddœdd„Zeeef ed œd!d"„Zeedœd#d$„ƒZedœd%d&„Z‡  ZS )'ÚUTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
    é   g®Gázî?N)Úreturnc                    sn   t ƒ  ¡  d| _dgd | _dgd | _tj| _ddddg| _d| _	d| _
d| _d| _d| _d| _|  ¡  d S ©Nr   é   F)ÚsuperÚ__init__ÚpositionÚzeros_at_modÚnonzeros_at_modr   Ú	DETECTINGÚ_stateÚquadÚinvalid_utf16beÚinvalid_utf16leÚinvalid_utf32beÚinvalid_utf32leÚ'first_half_surrogate_pair_detected_16beÚ'first_half_surrogate_pair_detected_16leÚreset©Úself©Ú	__class__© ú`/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/pip/_vendor/chardet/utf1632prober.pyr   )   s    
zUTF1632Prober.__init__c                    sf   t ƒ  ¡  d| _dgd | _dgd | _tj| _d| _d| _	d| _
d| _d| _d| _ddddg| _d S r
   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   8   s    
zUTF1632Prober.resetc                 C   s4   |   ¡ rdS |  ¡ rdS |  ¡ r$dS |  ¡ r0dS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)Úis_likely_utf32beÚis_likely_utf32leÚis_likely_utf16beÚis_likely_utf16ler   r   r   r    Úcharset_nameF   s    zUTF1632Prober.charset_namec                 C   s   dS )NÚ r   r   r   r   r    ÚlanguageS   s    zUTF1632Prober.languagec                 C   s   t d| jd ƒS )Nç      ð?g      @©Úmaxr   r   r   r   r    Úapprox_32bit_charsW   s    z UTF1632Prober.approx_32bit_charsc                 C   s   t d| jd ƒS )Nr(   g       @r)   r   r   r   r    Úapprox_16bit_charsZ   s    z UTF1632Prober.approx_16bit_charsc                 C   sj   |   ¡ }|| jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| j S ©Nr   r   é   é   )r+   ÚMIN_CHARS_FOR_DETECTIONr   ÚEXPECTED_RATIOr   r   ©r   Úapprox_charsr   r   r    r!   ]   s    
ÿþýûzUTF1632Prober.is_likely_utf32bec                 C   sj   |   ¡ }|| jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| jd | | jkoh| j S r-   )r+   r0   r   r1   r   r   r2   r   r   r    r"   g   s    
ÿþýûzUTF1632Prober.is_likely_utf32lec                 C   sV   |   ¡ }|| jkoT| jd | jd  | | jkoT| jd | jd  | | jkoT| j S )Nr   r/   r   r.   )r,   r0   r   r1   r   r   r2   r   r   r    r#   q   s    
ÿÿþûzUTF1632Prober.is_likely_utf16bec                 C   sV   |   ¡ }|| jkoT| jd | jd  | | jkoT| jd | jd  | | jkoT| j S )Nr   r.   r   r/   )r,   r0   r   r1   r   r   r2   r   r   r    r$   {   s    
ÿÿþûzUTF1632Prober.is_likely_utf16le)r   r	   c                 C   s¨   |d dksL|d dksL|d dkrR|d dkrRd|d   krHdkrRn nd| _ |d dksž|d dksž|d dkr¤|d dkr¤d|d   kršdkr¤n nd| _d	S )
zÖ
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r   é   éØ   r.   éß   Tr/   N)r   r   )r   r   r   r   r    Úvalidate_utf32_characters…   s8    

ÿ
þ
ý
ý ýý

ÿ
þ
ý
ý ýý
z'UTF1632Prober.validate_utf32_characters)Úpairr	   c                 C   sô   | j sNd|d   krdkr*n nd| _ qxd|d   krBdkrxn qxd| _n*d|d   krfdkrrn nd| _ nd| _| jsÆd|d   kr–dkr¢n nd| _qðd|d   krºdkrðn qðd| _n*d|d   krÞdkrên nd| _nd| _d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        r5   r   éÛ   TéÜ   r6   Fr   N)r   r   r   r   )r   r8   r   r   r    Úvalidate_utf16_characters›   s     
z'UTF1632Prober.validate_utf16_characters)Úbyte_strr	   c                 C   sœ   |D ]}| j d }|| j|< |dkrX|  | j¡ |  | jdd… ¡ |  | jdd… ¡ |dkrt| j|  d7  < n| j|  d7  < |  j d7  _ q| jS )Nr   r/   r   r.   r   )r   r   r7   r;   r   r   Ústate)r   r<   ÚcÚmod4r   r   r    Úfeed»   s    

zUTF1632Prober.feedc                 C   sF   | j tjtjhkr| j S |  ¡ dkr.tj| _ n| jdkr@tj| _ | j S )Ngš™™™™™é?i   )r   r   ÚNOT_MEÚFOUND_ITÚget_confidencer   r   r   r   r    r=   Ê   s    

zUTF1632Prober.statec                 C   s(   |   ¡ s |  ¡ s |  ¡ s |  ¡ r$dS dS )Ng333333ë?g        )r$   r#   r"   r!   r   r   r   r    rC   ×   s    þýüûøzUTF1632Prober.get_confidence) Ú__name__Ú
__module__Ú__qualname__Ú__doc__r0   r1   r   r   ÚpropertyÚstrr%   r'   Úfloatr+   r,   Úboolr!   r"   r#   r$   r   Úintr7   r;   r   ÚbytesÚ	bytearrayr   r@   r=   rC   Ú__classcell__r   r   r   r    r      s*   	



 r   N)Útypingr   r   Úcharsetproberr   Úenumsr   r   r   r   r   r    Ú<module>   s   