U
    -e                     @   sd   d Z ddlZddlmZ eeeef edddZG dd deZ	G d	d
 d
eZ
G dd dZdS )a  
Hashing function for dataset keys using `hashlib.md5`

Requirements for the hash function:

- Provides a uniformly distributed hash from random space
- Adequately fast speed
- Working with multiple input types (in this case, `str`, `int` or `bytes`)
- Should be platform independent (generates same hash on different OS and systems)

The hashing function provides a unique 128-bit integer hash of the key provided.

The split name is being used here as the hash salt to avoid having same hashes
in different splits due to same keys
    N)Union)	hash_datareturnc                 C   sL   t | tr| S t | tr&| dd} nt | tr:t| } nt| | dS )z|
    Returns the input hash_data in its bytes form

    Args:
    hash_data: the hash salt/key to be converted to bytes
    \/zutf-8)
isinstancebytesstrreplaceintInvalidKeyErrorencode)r    r   Q/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/keyhash.py	_as_bytes%   s    



r   c                       s    e Zd ZdZ fddZ  ZS )r   z6Raises an error when given key is of invalid datatype.c                    sD   d| _ d| dt| | _d| _t | j  | j | j  d S )Nz7
FAILURE TO GENERATE DATASET: Invalid key type detectedz
Found Key z	 of type z-
Keys should be either str, int or bytes type)prefixtypeerr_msgsuffixsuper__init__)selfr   	__class__r   r   r   ?   s    zInvalidKeyError.__init____name__
__module____qualname____doc__r   __classcell__r   r   r   r   r   <   s   r   c                       s"   e Zd ZdZd fdd	Z  ZS )DuplicatedKeysErrorz(Raise an error when duplicate key found. c                    s   || _ || _|| _d| _t|dkr>dd| d| | _n.dd|d d  dt|d  d| | _|rxd| nd	| _t 	| j | j | j  d S )
Nz3Found multiple examples generated with the same key   z
The examples at index z, z have the key z... (z more) have the key 
r!   )
keyduplicate_key_indicesfix_msgr   lenjoinr   r   r   r   )r   r$   r%   r&   r   r   r   r   I   s    .zDuplicatedKeysError.__init__)r!   r   r   r   r   r   r    F   s   r    c                   @   s8   e Zd ZdZedddZeeeef edddZ	dS )		KeyHasherz,KeyHasher class for providing hash using md5)	hash_saltc                 C   s   t t|| _d S )N)hashlibmd5r   
_split_md5)r   r*   r   r   r   r   Y   s    zKeyHasher.__init__)r$   r   c                 C   s*   | j  }t|}|| t| dS )zReturns 128-bits unique hash of input key

        Args:
        key: the input key to be hashed (should be str, int or bytes)

        Returns: 128-bit int hash key   )r-   copyr   updater   	hexdigest)r   r$   r,   Zbyte_keyr   r   r   hash\   s    

zKeyHasher.hashN)
r   r   r   r   r	   r   r   r   r   r2   r   r   r   r   r)   V   s   r)   )r   r+   typingr   r	   r   r   r   	Exceptionr   r    r)   r   r   r   r   <module>   s   
