U
    -ee                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZmZ ddlZddlZddl Z dd	lm!Z! dd
l"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl$m)Z) ddl*m+Z+ ddl,m-Z- e).e/Z0dZ1ede2eZ3dgeeee2f  e2dddZ4e2e5dddZ6e2e5dddZ7e2e5dddZ8e3e3ddd Z9dhe2e2e2d#d$d%Z:die2e2ee j;e<f d#d&d'Z=dje2e2ee2 e2d(d)d*Z>e2e2e2d+d,d-Z?e2e2d.d/d0Z@dkd1d2ZAdle2d3d4d5ZBdmeee2eCf  e2d6d7d8ZDdne2eee2e5f  eee2e5f  eCd:d;d<ZEG d=d> d>eFZGdoee2 d?d@dAZHdde jIjJdddBdfeeK eeC ee jIjJ eeeL  eLeMeMdCdDdEZNdpe2e2eLeMeMeMe j;dGdHdIZOdqdJdKZPG dLdM dMejQjRZRdrdNdOZSdsdPdQZTdtdRdSZUdudUdVZVdve j;d3dWdXZWdwe2eee2e5f  eee2e5f  ee2 d:dYdZZXdxe2d3d\d]ZYd^d_ ZZd`da Z[dbdc Z\ej]dddedfZ^dS )yz
Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
    N)closingcontextmanager)partial)sha256)Path)ListOptionalTypeTypeVarUnion)urljoinurlparse)HfFolder)version   )__version__config)DownloadConfig   )logging)ExtractManager)FileLock.incompleteT)hf_modules_cachereturnc              	   C   sx   | dk	r| nt j} t| } | tjkrttj|  tj| dd tjtj	| dstt
tj	| dd W 5 Q R X | S )z
    Add hf_modules_cache to the python path.
    By default hf_modules_cache='~/.cache/huggingface/modules'.
    It can also be set with the environment variable HF_MODULES_CACHE.
    This is used to add modules such as `datasets_modules`
    NTexist_okz__init__.pyw)r   ZHF_MODULES_CACHEstrsyspathappendosmakedirsexistsjoinopen)r    r(   Z/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/utils/file_utils.pyinit_hf_modules-   s    

r*   )url_or_filenamer   c                 C   s&   t | jdko$tjt | jd  S N z:/r   schemer#   r!   ismountr+   r(   r(   r)   is_remote_url@   s    r2   c                 C   s$   t | jdkp"tjt | jd S r,   r.   r1   r(   r(   r)   is_local_pathD   s    r3   c                 C   s   t | jdkotj|  S )Nr-   )r   r/   r#   r!   isabsr1   r(   r(   r)   is_relative_pathK   s    r5   )r!   r   c                 C   s6   t jt jt jt| }t| tr2t|S |S )z'Convert relative path to absolute path.)r#   r!   abspath
expanduser
expandvarsr   
isinstancer   )r!   Zabs_path_strr(   r(   r)   relative_to_absolute_pathO   s     r:   FT)
identifierfilenamer   c                 C   s6   |r|rt jnt j}n|r t jnt j}d|| |fS N/)r   Z"CLOUDFRONT_DATASETS_DISTRIB_PREFIXZS3_DATASETS_BUCKET_PREFIXZ!CLOUDFRONT_METRICS_DISTRIB_PREFIXZS3_METRICS_BUCKET_PREFIXr&   )r;   r<   use_cdndatasetZendpointr(   r(   r)   hf_bucket_urlU   s    rA   c                 C   s   t t| |||d|dS )N)r;   r<   r?   r@   )max_retries)	http_headrA   )r;   r<   r?   r@   rB   r(   r(   r)   
head_hf_s3]   s    rD   )r!   namerevisionr   c                 C   sH   t tjrdnt}|p|}|r2tjj|| |dS tjj|| |dS d S )Nmain)rF   r!   rE   )r   parser   is_devreleaser   ZREPO_DATASETS_URLformatZREPO_METRICS_URL)r!   rE   r@   rF   Zdefault_revisionr(   r(   r)   hf_github_urlf   s
    rK   )	base_name	pathnamesr   c                 G   s8   t | r"tj| fdd |D  S t| f|  S d S )Nc                 s   s&   | ]}t |tjd d V  qdS r>   N)r   replacer#   seplstrip).0pathnamer(   r(   r)   	<genexpr>q   s     z#url_or_path_join.<locals>.<genexpr>)r2   	posixpathr&   r   as_posix)rL   rM   r(   r(   r)   url_or_path_joino   s    rW   )url_or_pathr   c                 C   s*   t | r| d | d S tj| S d S r=   )r2   rindexr#   r!   dirname)rX   r(   r(   r)   url_or_path_parentv   s    r[   c                 C   sV   |  d}t|}| }|r@| d}t|}|d|  7 }| drR|d7 }|S )a  
    Convert `url` into a hashed filename in a repeatable way.
    If `etag` is specified, append its hash to the url's, delimited
    by a period.
    If the url ends with .h5 (Keras HDF5 weights) adds '.h5' to the name
    so that TF 2.0 can identify it as a HDF5 file
    (see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
    utf-8.z.py)encoder   	hexdigestendswith)urletagZ	url_bytesZurl_hashr<   Z
etag_bytesZ	etag_hashr(   r(   r)   hash_url_to_filename}   s    	


rc   )r   c                 K   s   |dkrt f |}|jptj}t|tr0t|}t| trBt| } t| rt| ||j	|j
|j|j|j|j|j|j|j|j|jd}n<tj| r| }n*t| rtd|  dntd|  d|dkr|S |jrt|jdj||jd}|S )	a  
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
        ValueError: if it couldn't parse the url or filename correctly
        requests.exceptions.ConnectionError: in case of internet connection issue
    N)	cache_dirforce_downloadproxiesresume_download
user_agentlocal_files_onlyuse_etagrB   tokenignore_url_paramsstorage_optionsdownload_desczLocal file z doesn't existzunable to parse z as a URL or as a local path)rd   )force_extract)r   rd   r   ZDOWNLOADED_DATASETS_PATHr9   r   r   r2   get_from_cachere   rf   rg   rh   ri   rj   rB   rk   rl   rm   rn   r#   r!   r%   r3   FileNotFoundError
ValueErrorZextract_compressed_filer   extractro   )r+   Zdownload_configZdownload_kwargsrd   Zoutput_pathr(   r(   r)   cached_path   sH    


 rt   )rh   r   c                 C   s   dt  }|dtj 7 }|dtj  7 }|dtj 7 }tjrP|dtj 7 }tjrf|dtj 7 }tj	r||dtj
 7 }tjr|dtj 7 }t| tr|d	d	d
d |  D  7 }nt| tr|d	|  7 }|S )Nz	datasets/z	; python/z; huggingface_hub/z
; pyarrow/z; torch/z; tensorflow/z; jax/z; apache_beam/z; c                 s   s    | ]\}}| d | V  qdS rN   r(   )rR   kvr(   r(   r)   rT      s     z*get_datasets_user_agent.<locals>.<genexpr>)r   r   Z
PY_VERSIONhuggingface_hubZPYARROW_VERSIONZTORCH_AVAILABLEZTORCH_VERSIONZTF_AVAILABLEZ
TF_VERSIONZJAX_AVAILABLEZJAX_VERSIONZBEAM_AVAILABLEZBEAM_VERSIONr9   dictr&   itemsr   )rh   Zuar(   r(   r)   get_datasets_user_agent   s"    

$
rz   
deprecated)ra   rk   use_auth_tokenr   c                 C   sl   |dkr t d| dt |}i }| tjrh|dkr>d}nt|trN|}nt	 }|rhd| |d< |S )zHandle the HF authenticationr{   'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=
' instead.FNzBearer authorization)
warningswarnFutureWarning
startswithr   HF_ENDPOINTr9   r   r   	get_token)ra   rk   r|   headersr(   r(   r)   "get_authentication_headers_for_url   s     

r   c                   @   s   e Zd ZdS )OfflineModeIsEnabledN)__name__
__module____qualname__r(   r(   r(   r)   r     s   r   msgc                 C   s&   t jr"t| dkrdn
dt|  dS )zaRaise an OfflineModeIsEnabled error (subclass of ConnectionError) if HF_DATASETS_OFFLINE is True.NzOffline mode is enabled.zOffline mode is enabled. )r   ZHF_DATASETS_OFFLINEr   r   r   r(   r(   r)   !_raise_if_offline_mode_is_enabled  s    r         ?)	func_argsfunc_kwargs
exceptionsstatus_codesrB   base_wait_timemax_wait_timec                 C   s   |pd}|pi }d}z| ||W S  |k
r }	 zf||ksJ|rP|	j j|krP|	nDt||d|  }
t|  d|
 d||  d t|
 |d7 }W 5 d }	~	X Y qX qd S )Nr(   r   r   z timed out, retrying in zs... []r   )responsestatus_codeminloggerinfotimesleep)funcr   r   r   r   rB   r   r   retryerr
sleep_timer(   r(   r)   _retry  s    
 
r         $@)methodra   rB   r   r   timeoutr   c                 K   s   t d|  d\}}|s|d7 }z$tjf |  ||d|}	d}W q tjjtjjfk
r }
 zR||krr|
n@t|  d| d||  d t	||d	|d   }t
| W 5 d
}
~
X Y qX q|	S )a  Wrapper around requests to retry in case it fails with a ConnectTimeout, with exponential backoff.

    Note that if the environment variable HF_DATASETS_OFFLINE is set to 1, then a OfflineModeIsEnabled error is raised.

    Args:
        method (str): HTTP method, such as 'GET' or 'HEAD'.
        url (str): The URL of the resource to fetch.
        max_retries (int): Maximum number of retries, defaults to 0 (no retries).
        base_wait_time (float): Duration (in seconds) to wait before retrying the first time. Wait time between
            retries then grows exponentially, capped by max_wait_time.
        max_wait_time (float): Maximum amount of time between two retries, in seconds.
        **params (additional keyword arguments): Params to pass to :obj:`requests.request`.
    Tried to reach )r   Fr   )r   ra   r   Tz request to z timed out, retrying... [r   r   N)r   requestsrequestupperr   ConnectTimeoutConnectionErrorr   r   r   r   r   )r   ra   rB   r   r   r   paramstriessuccessr   r   r   r(   r(   r)   _request_with_retry-  s     r   c                 C   sJ   t d|   tj| |d\}}}t|dkr<td| ||d S )Nr   rm   r   z=HEAD can be called with at most one path but was called with r   )r   fsspecget_fs_token_pathslenrr   r   )ra   rm   fs_pathsr(   r(   r)   fsspec_headT  s
    r   c                       s   e Zd Zd fdd	Z  ZS )TqdmCallbackNc                    s   t  j|f|| t| _d S N)super__init__r   Z_tqdm)selftqdm_kwargsargskwargs	__class__r(   r)   r   ]  s    zTqdmCallback.__init__)N)r   r   r   r   __classcell__r(   r(   r   r)   r   \  s   r   c                 C   st   t d|   tj| |d\}}}t|dkr<td| t|pDdt  dddd	}|j|d
 |j	|d d S )Nr   r   r   z<GET can be called with at most one path but was called with DownloadingBT)descdisableunit
unit_scale)r   r   )callback)
r   r   r   r   rr   r   r   is_progress_bar_enabledZget_filerE   )ra   	temp_filerm   r   r   r   r   r   r(   r(   r)   
fsspec_getb  s    r   c              	   C   sX   t d|   z.ttjj| |d}|d W 5 Q R X W n tk
rR   Y dS X dS )Nr   r   r   FT)r   r   urllibr   urlopenread	Exception)ra   r   rr(   r(   r)   ftp_headr  s    r   c              
   C   s   t d|   zHtd|  d|j  ttjj| |d}t	|| W 5 Q R X W n0 tj
jk
r } zt|d W 5 d }~X Y nX d S )Nr   zGetting through FTP z into r   )r   r   r   rE   r   r   r   r   shutilcopyfileobjerrorURLErrorr   )ra   r   r   r   er(   r(   r)   ftp_get|  s    r         Y@c	              
   C   s   t |pi }t|dd|d< |dkr<d|dd|d< td| d	|||||d
}	|	jdkrbd S |	jd}
|
d k	r|t|
 nd }tj	dd	|||pdt
  d0}|	jddD ]}|t| || qW 5 Q R X d S )N
user-agentrh   r   zbytes=d-RangeGETT)r   ra   streamrf   r   cookiesrB   r   i  zContent-Lengthr   r   )r   r   totalinitialr   r   i   )
chunk_size)copydeepcopyrz   getr   r   r   intr   Ztqdmr   iter_contentupdater   write)ra   r   rf   resume_sizer   r   r   rB   r   r   content_lengthr   progresschunkr(   r(   r)   http_get  s<    

r   c              
   C   s>   t |pi }t|dd|d< td| ||||||d}|S )Nr   r   HEAD)r   ra   rf   r   r   allow_redirectsr   rB   )r   r   rz   r   r   )ra   rf   r   r   r   r   rB   r   r(   r(   r)   rC     s    
rC   c                 C   sn   |dkr t d| dt |}t| jdkr2d S t| |d}t| |dd}|  |jrf|j	
dnd }|S )	Nr{   r}   r~   httphttpsrk      )r   rB   ETag)r   r   r   r   r/   r   rC   raise_for_statusokr   r   )ra   rk   r|   r   r   rb   r(   r(   r)   request_etag  s    
r   d   c           $         s4  |dkr t d| dt |}
|dkr.tj}t|tr@t|}tj	|dd |rdt
| t| j}n| }d}d}d}d}d}d}t|dd}tj||}tj|r|s|s|S t| |
d	}|dk	r||d
< |st| j}|dkrt| }n>|dkr.t| |d}|r&|ddp(|ddnd}d}zNt| d|||	|d}|jdkr|rd|jdnd}|j D ]2\}}|drrd| krr| d| 7 } |j}qrd}d| kr|d| kr|| d7 } n|jdkrd| ks4|jdkrd| ks4|jdkrtd| s4td|js4|jdkrJd| krJd}td|   n2|jdkr|tj | kr||
dkr|t!d |  d!W n0 t"t#j$j%fk
r } z|}W 5 d}~X Y nX |sptj|r|s|S |rt&d"| d#n$|dk	r
|jd$kr
t&d%|  t'd&|   |dk	r>t!d'|  d(t(| d)n2|dk	rbt!d'|  d*|j d)nt!d'|  t||}tj||}tj|r|s|S |d+ }t)|x tj|r|s|W  5 Q R  S |d,  t*d? fd.d/	}d0}|r"t+|d1d2}tj r"t, j-}| f}t|  d3|j.  |dkrVt/| | n4|dkrrt0| |||d4 nt1| ||||||	|d5 W 5 Q R X td6|  d7|  t23|j.| t4d8} t4|  t5|d8|  @  td9|  | |d:}!|d; }"t6|"d<d=d>}#t78|!|# W 5 Q R X W 5 Q R X |S )@a  
    Given a URL, look for the corresponding file in the local cache.
    If it's not there, download it. Then return the path to the cached file.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
    r{   r}   r~   NTr   F)rb   r   r   ftpr   r   r   rb   )r   rf   r   rB   r      Zdownload_warningzdrive.google.comz	&confirm=zconfirm=z
&confirm=ti  zfirebasestorage.googleapis.comi  i  z7^https?://github.com/.*?/.*?/releases/download/.*?/.*?$z#^https://.*?s3.*?amazonaws.com/.*?$zndownloader.figstatic.comz"Couldn't get ETag version for url i  zUnauthorized for URL zU. Please use the parameter `token=True` after logging in with `huggingface-cli login`z6Cannot find the requested files in the cached path at zi and outgoing traffic has been disabled. To enable file online look-ups, set 'local_files_only' to False.i  zCouldn't find file at r   zCouldn't reach z ()z (error z.lockr   w+bc              	   3   s    t  | }|V  W 5 Q R X d S r   )r'   )modefZincomplete_pathr(   r)   temp_file_managern  s    z)get_from_cache.<locals>.temp_file_managerr   za+b)r  zB not found in cache or force_download set to True, downloading to )rm   r   )rf   r   r   r   rB   r   zstoring z in cache at i  zcreating metadata file for )ra   rb   z.jsonr   r\   )encoding)r  )9r   r   r   r   ZHF_DATASETS_CACHEr9   r   r   r#   r$   r   r   r!   rc   r&   r%   r   r/   r   r   r   rC   r   r   r   ry   r   rematchra   r   r   r   r   OSErrorr   r   Timeoutrq   r   reprr   r   r   statst_sizerE   r   r   r   r   moveumaskchmodr'   jsondump)$ra   rd   re   rf   Zetag_timeoutrg   rh   ri   rj   rB   rk   r|   rl   rm   rn   Z
cached_url	connectedr   r   rb   Z
head_errorr/   r<   
cache_pathr   ru   rv   r   Z	lock_pathr  r   r   r  meta	meta_pathZ	meta_filer(   r  r)   rp     s   




$




"









 rp   c                     s    fdd}|S )Nc                    s(   d  d | jd k	r| jnd | _| S Nr-   z

)r&   __doc__fndocstrr(   r)   docstring_decorator  s    $z1add_start_docstrings.<locals>.docstring_decoratorr(   r  r  r(   r  r)   add_start_docstrings  s    r  c                     s    fdd}|S )Nc                    s(   | j d k	r| j ndd d  | _ | S r  )r  r&   r  r  r(   r)   r    s    $z/add_end_docstrings.<locals>.docstring_decoratorr(   r  r(   r  r)   add_end_docstrings  s    r   c                 C   s   t dd | D S )Nc                 s   s   | ]}|  jV  qd S r   )r  r  )rR   r!   r(   r(   r)   rT     s     z(estimate_dataset_size.<locals>.<genexpr>)sum)r   r(   r(   r)   estimate_dataset_size  s    r"  )r  c                 C   s4   t  }| d}|sq,||7 }|drq,qt|S )Nr      
)	bytearrayr   r`   bytes)r  resbr(   r(   r)   readline  s    

r(  )N)FT)FTr   )TN)N)N)N)Nr{   )N)r   r   r   r   )N)NN)r   )r   )Nr   NNr   r   N)NNNTr   r   )Nr{   )NFNr   FNFTr   Nr{   FNN)_r  r   ior  r#   rU   r  r   r    r   r   r   
contextlibr   r   	functoolsr   hashlibr   pathlibr   typingr   r   r	   r
   r   urllib.parser   r   r   rw   r   r   	packagingr   r-   r   r   Zdownload.download_configr   r   rs   r   Zfilelockr   Z
get_loggerr   r   ZINCOMPLETE_SUFFIXr   r   r*   boolr2   r3   r5   r:   rA   Responser   rD   rK   rW   r[   rc   rt   rx   rz   r   r   r   r   r   RequestExceptiontupler   floatr   r   r   	callbacksr   r   r   r   r   rC   r   rp   r  r   r"  	RawIOBaser(  r(   r(   r(   r)   <module>   s   
	      		
 E     


    '




             
#                               R