U
    +-e                    @   s
  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlmZm Z  d dl!Z!d d	l"m#Z# d d
l$m%Z%m&Z& d dl'm(Z( ddl)m*Z* ddl(m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 ddl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ ddlRmSZS ddlTmUZU ddlVmWZW eOXeYZZe[dZ\i Z]ee^e_f e`d< dlee^edf e_dddZaeb ZceZde[dZeeddG dd dZfeQddddde^e^ee^ ee^ ee^ ee^ e^ddd Zgdme^ee^ e^d!d"d#Zhdnee^ e_ee^e^f d%d&d'Zidddd(ee^ ee^ eee^df e^d)d*d+ZjG d,d- d-ekZldoee^ d.d/d0Zmd d1d2d3d$d4eWe^eneoeoeeo e_e!jpd5d6d7Zqe!jpd8d9d:Zrdd dd3d dd;e^eeoeee^e^f  eeo eneen d<d=d>ZseQddddd$ddd?d$dd$d$d@e^ee^ ee^ ee^edf eee^df e_ee^ ee eoe_ee_e^df e_e_e^dAdBdCZtee^ ee^ dDdEdFZudpe^e^e_ddGdHdIZvdqe^e^e_ddGdJdKZwe^e^e^ddLdMdNZxeQe^e^e^dOdPdQZyenee^ef ddRdSdTZzeQdddddddddUdd$ddd?d$dd$d$dVe^e^ee^ ee^ ee^ ee^ ee^ ee^ ee^edf ee^edf ee_edU f eee^df e_ee^ ee eoe_ee_e^df e_e_e^dWdXdYZ{eQdre^e^ee^edf ee^ ee^ ee^eddf dZd[d\Z|eQdse^ee_e^df ee eeo efd]d^d_Z}ee^ een d`dadbZ~e^e^ddcdddeZe^e^e^e^dfdgdhZe^e^e^ee_edU f e^didjdkZdS )t    N)contextmanager)	dataclass)partial)sha256)Path)AnyBinaryIODict	GeneratorLiteralOptionalTupleUnion)quoteurlparse)FileLock)
ProxyErrorTimeout)	constants   )__version__)DEFAULT_REVISIONENDPOINTHF_HUB_DISABLE_SYMLINKS_WARNINGHF_HUB_ENABLE_HF_TRANSFERHUGGINGFACE_CO_URL_TEMPLATE HUGGINGFACE_HEADER_X_LINKED_ETAG HUGGINGFACE_HEADER_X_LINKED_SIZE HUGGINGFACE_HEADER_X_REPO_COMMITHUGGINGFACE_HUB_CACHEREPO_ID_SEPARATOR
REPO_TYPESREPO_TYPES_URL_PREFIXES)EntryNotFoundErrorGatedRepoErrorLocalEntryNotFoundErrorRepositoryNotFoundErrorRevisionNotFoundErrorSoftTemporaryDirectorybuild_hf_headersget_fastai_versionget_fastcore_versionget_graphviz_versionget_jinja_versionget_pydot_versionget_tf_versionget_torch_versionhf_raise_for_statushttp_backoffis_fastai_availableis_fastcore_availableis_graphviz_availableis_jinja_availableis_pydot_availableis_tf_availableis_torch_availableloggingtqdmvalidate_hf_hub_args_http_user_agent)_PY_VERSION)HTTP_METHOD_Tzfilename="(?P<filename>.*?)";_are_symlinks_supported_in_dir)	cache_dirreturnc              	   C   s   | dkrt } tt|   } | tkrdt| < tj| dd t| d}t|d }|	  t|d }tj
j|tj
|d}zt|| W nH tk
r   dt| < tsd	|  d
}tjdkr|d7 }t| Y nX W 5 Q R X t|  S )a  Return whether the symlinks are supported on the machine.

    Since symlinks support can change depending on the mounted disk, we need to check
    on the precise cache folder. By default, the default HF cache directory is checked.

    Args:
        cache_dir (`str`, `Path`, *optional*):
            Path to the folder where cached files are stored.

    Returns: [bool] Whether symlinks are supported in the directory.
    NTexist_ok)dirZdummy_file_srcZdummy_file_dst)startFz`huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in a%  . Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.nta	  
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development)r   strr   
expanduserresolverA   osmakedirsr(   touchpathrelpathdirnamesymlinkOSErrorr   namewarningswarn)rB   ZtmpdirZsrc_pathZdst_pathrelative_srcmessage rY   ^/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/huggingface_hub/file_download.pyare_symlinks_supportedS   s0    


r[   z^[0-9a-f]{40}$T)frozenc                   @   s>   e Zd ZU dZee ed< ee ed< eed< ee ed< dS )HfFileMetadataa8  Data structure containing information about a file versioned on the Hub.

    Returned by [`get_hf_file_metadata`] based on a URL.

    Args:
        commit_hash (`str`, *optional*):
            The commit_hash related to the file.
        etag (`str`, *optional*):
            Etag of the file on the server.
        location (`str`):
            Location where to download the file. Can be a Hub url or not (CDN).
        size (`size`):
            Size of the file. In case of an LFS file, contains the size of the actual
            LFS file, not the pointer.
    commit_hashetaglocationsizeN)__name__
__module____qualname____doc__r   rI   __annotations__intrY   rY   rY   rZ   r]      s
   
r]   	subfolder	repo_typerevisionendpoint)repo_idfilenameri   rj   rk   rl   rC   c                C   s   |dkrd}|dk	r"| d| }|t kr2td|tkrFt| |  } |dkrRt}tj| t|ddt|d}|dk	r|tr||t	td  }|S )a=	  Construct the URL of a file from the given information.

    The resolved address can either be a huggingface.co-hosted url, or a link to
    Cloudfront (a Content Delivery Network, or CDN) for large files which are
    more than a few MBs.

    Args:
        repo_id (`str`):
            A namespace (user or an organization) name and a repo name separated
            by a `/`.
        filename (`str`):
            The name of the file in the repo.
        subfolder (`str`, *optional*):
            An optional value corresponding to a folder inside the repo.
        repo_type (`str`, *optional*):
            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
            `None` or `"model"` if downloading from a model. Default is `None`.
        revision (`str`, *optional*):
            An optional Git revision id which can be a branch name, a tag, or a
            commit hash.
        endpoint (`str`, *optional*):
            Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
            environment variable.

    Example:

    ```python
    >>> from huggingface_hub import hf_hub_url

    >>> hf_hub_url(
    ...     repo_id="julien-c/EsperBERTo-small", filename="pytorch_model.bin"
    ... )
    'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin'
    ```

    <Tip>

    Notes:

        Cloudfront is replicated over the globe so downloads are way faster for
        the end user (and it also lowers our bandwidth costs).

        Cloudfront aggressively caches files by default (default TTL is 24
        hours), however this is not an issue here because we implement a
        git-based versioning system on huggingface.co, which means that we store
        the files on S3/Cloudfront in a content-addressable way (i.e., the file
        name is its hash). Using content-addressable filenames means cache can't
        ever be stale.

        In terms of client-side caching from this library, we base our caching
        on the objects' entity tag (`ETag`), which is an identifier of a
        specific version of a resource [1]_. An object's ETag is: its git-sha1
        if stored in git, or its sha256 if stored in git-lfs.

    </Tip>

    References:

    -  [1] https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
     N/zInvalid repo type)safe)rm   rk   rn   )
r!   
ValueErrorr"   r   r   formatr   
startswithr   len)rm   rn   ri   rj   rk   rl   urlrY   rY   rZ   
hf_hub_url   s$    F 
 rw   )rv   r_   rC   c                 C   sN   |  d}t| }|r8| d}|dt|  7 }| drJ|d7 }|S )a  Generate a local filename from a url.

    Convert `url` into a hashed filename in a reproducible way. If `etag` is
    specified, append its hash to the url's, delimited by a period. If the url
    ends with .h5 (Keras HDF5 weights) adds '.h5' to the name so that TF 2.0 can
    identify it as a HDF5 file (see
    https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)

    Args:
        url (`str`):
            The address to the file.
        etag (`str`, *optional*):
            The ETag of the file.

    Returns:
        The generated filename.
    utf-8.z.h5)encoder   	hexdigestendswith)rv   r_   Z	url_bytesrn   Z
etag_bytesrY   rY   rZ   url_to_filename  s    


r}   F)rB   legacy_cache_layoutrC   c           	   	   C   s   |st dt |dkrt}t|tr.t|}tj	|| }tj
|sXtd| d|d }tj
|s|td| dt|dd}t|}W 5 Q R X |d }|d	 }||fS )
aW  
    Return the url and etag (which may be `None`) stored for `filename`. Raise
    `EnvironmentError` if `filename` or its stored metadata do not exist.

    Args:
        filename (`str`):
            The name of the file
        cache_dir (`str`, *optional*):
            The cache directory to use instead of the default one.
        legacy_cache_layout (`bool`, *optional*, defaults to `False`):
            If `True`, uses the legacy file cache layout i.e. just call `hf_hub_url`
            then `cached_download`. This is deprecated as the new cache layout is
            more powerful.
    z7`filename_to_url` uses the legacy way cache file layoutNzfile z
 not found.jsonrx   )encodingrv   r_   )rU   rV   FutureWarningr   
isinstancer   rI   rL   rO   joinexistsEnvironmentErroropenjsonload)	rn   rB   r~   
cache_path	meta_path	meta_filemetadatarv   r_   rY   rY   rZ   filename_to_url'  s(    
r   library_namelibrary_version
user_agent)r   r   r   rC   c                 C   s   t | ||dS )z,Deprecated in favor of [`build_hf_headers`].r   r=   r   rY   rY   rZ   http_user_agentU  s
    r   c                   @   s   e Zd ZdS )OfflineModeIsEnabledN)rb   rc   rd   rY   rY   rY   rZ   r   c  s   r   msgc                 C   s&   t jr"t| dkrdn
dt|  dS )z_Raise a OfflineModeIsEnabled error (subclass of ConnectionError) if
    HF_HUB_OFFLINE is True.NzOffline mode is enabled.zOffline mode is enabled. )r   ZHF_HUB_OFFLINEr   rI   r   rY   rY   rZ   !_raise_if_offline_mode_is_enabledg  s    r   g      ?         $@)max_retriesbase_wait_timemax_wait_timetimeoutfollow_relative_redirects)methodrv   r   r   r   r   r   rC   c          
      K   s   t d|  |rtf | |||||dd|}d|j  krFdkrn nHt|jd }	|	jdkrtf | t|j|	jd ||||d	d|S |S t	f | ||||t
tfd
|d|S )a  Wrapper around requests methods to add several features.

    What it does:
    1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1).
       If enabled, a `OfflineModeIsEnabled` exception is raised.
    2. Follow relative redirections if `follow_relative_redirects=True` even when
       `allow_redirection` kwarg is set to False.
    3. Retry in case request fails with a `Timeout` or `ProxyError`, with exponential backoff.

    Args:
        method (`str`):
            HTTP method, such as 'GET' or 'HEAD'.
        url (`str`):
            The URL of the resource to fetch.
        max_retries (`int`, *optional*, defaults to `0`):
            Maximum number of retries, defaults to 0 (no retries).
        base_wait_time (`float`, *optional*, defaults to `0.5`):
            Duration (in seconds) to wait before retrying the first time.
            Wait time between retries then grows exponentially, capped by
            `max_wait_time`.
        max_wait_time (`float`, *optional*, defaults to `2`):
            Maximum amount of time between two retries, in seconds.
        timeout (`float`, *optional*, defaults to `10`):
            How many seconds to wait for the server to send data before
            giving up which is passed to `requests.request`.
        follow_relative_redirects (`bool`, *optional*, defaults to `False`)
            If True, relative redirection (redirection to the same site) will be
            resolved even when `allow_redirection` kwarg is set to False. Useful when we
            want to follow a redirection to a renamed repository without following
            redirection to a CDN.
        **params (`dict`, *optional*):
            Params to pass to `requests.request`.
    zTried to reach F)r   rv   r   r   r   r   r   ,    Locationro   )rO   TrY   )r   rv   r   r   r   Zretry_on_exceptionsZretry_on_status_codesr   )r   _request_wrapperstatus_coder   headersnetloc_replacerO   geturlr2   r   r   )
r   rv   r   r   r   r   r   paramsresponseZparsed_targetrY   rY   rZ   r   p  sP    -

	r   rC   c                  O   s
   t | |S )zDeprecated method. Please use `_request_wrapper` instead.

    Alias to keep backward compatibility (used in Transformers).
    )r   )argskwargsrY   rY   rZ   _request_with_retry  s    r   )proxiesresume_sizer   r   r   expected_size)rv   	temp_filer   r   r   r   r   c             
   C   s  |st rz@ddlm} td|  d d}	d}
|| |j|	|
|d W dS  tk
rd   td	Y n, tk
r } zt	d
|W 5 d}~X Y nX t
|pi }|dkrd|f |d< td| d||||d}t| |jd}|dk	r|t| nd}| }|jd}|dk	r0t|}|dk	r0| d }t|dkrPd|dd  }tdd||d| tt tjkd}|jddD ]$}|r|t| || q|dk	r|| krtd| d|  d| d|  dS )zs
    Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
    r   )downloadz	Download z using HF_TRANSFER.d   i   )r   NzFast download using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not available in your environment. Try `pip install hf_transfer`.zAn error occurred while downloading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling.z	bytes=%d-RangeGETT)r   rv   streamr   r   r   r   Content-LengthzContent-Dispositionrn      u   (…)iBzDownloading )unitZ
unit_scaletotalinitialdescdisable)
chunk_sizez1Consistency check failed: file should be of size z but has size z (z).
We are sorry for the inconvenience. Please retry download and pass `force_download=True, resume_download=False` as argument.
If the issue persists, please let us know by opening an issue on https://github.com/huggingface/huggingface_hub.) r   Zhf_transferr   loggerdebugrT   ImportErrorrr   	ExceptionRuntimeErrorcopydeepcopyr   r1   r   getrg   HEADER_FILENAME_PATTERNsearch	groupdictru   r;   boolgetEffectiveLevelr:   NOTSETiter_contentupdatewritetellr   close)rv   r   r   r   r   r   r   r   r   Z	max_filesr   ercontent_lengthr   Zdisplayed_namecontent_dispositionmatchprogresschunkrY   rY   rZ   http_get  sv    	


r   
   r   r   rB   r   force_downloadforce_filenamer   etag_timeoutresume_downloadtokenlocal_files_onlyr~   )rv   r   r   rB   r   r   r   r   r   r   r   r   r~   rC   c             
      s  |st dt |dkrt}t|tr.t|}tj|dd t	|
|||d}| }d}d}|sHzd|d< t
d| |d	d||d
}|dd t| |jtp|jd}|dkrtdt|jd}d|j  krdkrn n|jd }|dd d}W nF tjjtjjfk
r$    Y n$ tjjtjjtfk
rF   Y nX |dk	rV|nt| |}tj||}|dkrtj|r|s|S dd tt ||!dd d D }t"|dkr|s|dkrtj||d S |rt#dnt#dtj|r|s|S |d }tj$dkrTt"tj%|dkrTdtj%| }tj$dkrt"tj%|dkrdtj%| }t&|0 tj|r|s|W  5 Q R  S |	r|d  t't(t)j*ddf d fd d!}|}tj rt+ j,}nd}nt-t.j/d"|d	d#}d}| *}t01d$| |j$ t2||||||d% W 5 Q R X t01d&| | t3|j$| |dkrt01d'| | |d(}|d) }t4|d*}t56|| W 5 Q R X W 5 Q R X |S )+a  
    Download from a given URL and cache it if it's not already present in the
    local cache.

    Given a URL, this function looks for the corresponding file in the local
    cache. If it's not there, download it. Then return the path to the cached
    file.

    Will raise errors tailored to the Hugging Face Hub.

    Args:
        url (`str`):
            The path to the file to be downloaded.
        library_name (`str`, *optional*):
            The name of the library to which the object corresponds.
        library_version (`str`, *optional*):
            The version of the library.
        cache_dir (`str`, `Path`, *optional*):
            Path to the folder where cached files are stored.
        user_agent (`dict`, `str`, *optional*):
            The user-agent info in the form of a dictionary or a string.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether the file should be downloaded even if it already exists in
            the local cache.
        force_filename (`str`, *optional*):
            Use this name instead of a generated file name.
        proxies (`dict`, *optional*):
            Dictionary mapping protocol to the URL of the proxy passed to
            `requests.request`.
        etag_timeout (`float`, *optional* defaults to `10`):
            When fetching ETag, how many seconds to wait for the server to send
            data before giving up which is passed to `requests.request`.
        resume_download (`bool`, *optional*, defaults to `False`):
            If `True`, resume a previously interrupted download.
        token (`bool`, `str`, *optional*):
            A token to be used for the download.
                - If `True`, the token is read from the HuggingFace config
                  folder.
                - If a string, it's used as the authentication token.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        legacy_cache_layout (`bool`, *optional*, defaults to `False`):
            Set this parameter to `True` to mention that you'd like to continue
            the old cache layout. Putting this to `True` manually will not raise
            any warning when using `cached_download`. We recommend using
            `hf_hub_download` to take advantage of the new cache.

    Returns:
        Local path (string) of file or if networking is off, last version of
        file cached on disk.

    <Tip>

    Raises the following errors:

        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
          if `token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
          if ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
          if some parameter value is invalid
        - [`~utils.RepositoryNotFoundError`]
          If the repository to download from cannot be found. This may be because it doesn't exist,
          or because it is set to `private` and you do not have access.
        - [`~utils.RevisionNotFoundError`]
          If the revision to download from cannot be found.
        - [`~utils.EntryNotFoundError`]
          If the file to download cannot be found.
        - [`~utils.LocalEntryNotFoundError`]
          If network is disabled or unavailable and file is not found in cache.

    </Tip>
    zu'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to 'hf_hub_download'NTrD   r   r   r   r   identityAccept-EncodingHEADFr   rv   r   allow_redirectsr   r   r   ETag\Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility.r   r   r   r   authorizationc                 S   s$   g | ]}| d s| ds|qS )r   .lock)r|   ).0filerY   rY   rZ   
<listcomp>  s   
 
z#cached_download.<locals>.<listcomp>ry   r   z.*zCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable model look-ups and downloads online, set 'local_files_only' to False.zConnection error, and we cannot find the requested files in the cached path. Please try again or make sure your Internet connection is on.r   rH      \\?\.incompleter   c               	   3   s    t  d} | V  W 5 Q R X d S Nabr   fZincomplete_pathrY   rZ   _resumable_file_manager  s    z0cached_download.<locals>._resumable_file_managerwbmoderF   deletedownloading %s to %sr   r   r   r   zstoring %s in cache at %szcreating metadata file for %s)rv   r_   r   w)7rU   rV   r   r   r   r   rI   rL   rM   r)   r   popr1   r   r   r   rS   _int_or_noner   requests
exceptionsSSLErrorr   ConnectionErrorr   r   r}   rO   r   r   fnmatchfilterlistdirsplitru   r%   rT   abspathr   r   r
   ioBufferedWriterstatst_sizer   tempfileNamedTemporaryFiler   infor   _chmod_and_replacer   r   dump)rv   r   r   rB   r   r   r   r   r   r   r   r   r~   r   url_to_downloadr_   r   r   rn   r   Zmatching_files	lock_pathr   temp_file_managerr   r   metar   r   rY   r   rZ   cached_download7  s    [
	

""    	

 r  )r_   rC   c                 C   s   | dkrdS |  ddS )aO  Normalize ETag HTTP header, so it can be used to create nice filepaths.

    The HTTP spec allows two forms of ETag:
      ETag: W/"<etag_value>"
      ETag: "<etag_value>"

    For now, we only expect the second form from the server, but we want to be future-proof so we support both. For
    more context, see `TestNormalizeEtag` tests and https://github.com/huggingface/huggingface_hub/pull/1428.

    Args:
        etag (`str`, *optional*): HTTP header

    Returns:
        `str` or `None`: string that can be used as a nice directory name.
        Returns `None` if input is None.
    NzW/")lstripstrip)r_   rY   rY   rZ   _normalize_etag>  s    r  )srcdstnew_blobrC   c                 C   s   t | ||dS )z6Alias method used in `transformers` conversion script.r  r   r!  )_create_symlinkr"  rY   rY   rZ   _create_relative_symlinkT  s    r$  c           	      C   s  zt | W n tk
r"   Y nX t jt j| }t jt j|}zt j|t j|}W n tk
r~   d}Y nX zHz$t j	||g}t
t j|}W n tk
r   t jdk}Y nX W n$ tk
r   t
t j|}Y nX |rh|p|}td| d|  zt || W nB tk
rd   t j|r^t j|t j|kr^n Y nX nL|rtd| d|  t| | n"td| d|  t| | dS )u  Create a symbolic link named dst pointing to src.

    By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages:
    - If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will
      not brake.
    - Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when
      changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398,
      https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228.
      NOTE: The issue with absolute paths doesn't happen on admin mode.
    When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created.
    This happens when paths are not on the same volume. In that case, we use absolute paths.


    The result layout looks something like
        └── [ 128]  snapshots
            ├── [ 128]  2439f60ef33a0d46d85da5001d52aeda5b00ce9f
            │   ├── [  52]  README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
            │   └── [  76]  pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd

    If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by
    having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file
    (`new_blob=False`), we don't know if the blob file is already referenced elsewhere. To avoid breaking existing
    cache, the file is duplicated on the disk.

    In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`.
    The warning message can be disable with the `DISABLE_SYMLINKS_WARNING` environment variable.
    NrH   zCreating pointer from z to z(Symlink not supported. Moving file from z)Symlink not supported. Copying file from )rL   removerS   rO   r  rJ   rP   rQ   rr   
commonpathr[   rT   PermissionErrorr   r  rR   FileExistsErrorislinkrealpathshutilmovecopyfile)	r  r   r!  Zabs_srcZabs_dstrW   r&  Z_support_symlinksZsrc_rel_or_absrY   rY   rZ   r#  Y  s@    
(
r#  )storage_folderrk   r^   rC   c                 C   sJ   ||krFt | d | }|jjddd | r<|| krF|| dS )zCache reference between a revision (tag, branch or truncated commit hash) and the corresponding commit hash.

    Does nothing if `revision` is already a proper `commit_hash` or reference is already cached.
    refsTparentsrE   N)r   parentmkdirr   	read_text
write_text)r.  rk   r^   ref_pathrY   rY   rZ   (_cache_commit_hash_for_specific_revision  s
    r7  )rm   rj   rC   c                 C   s    | df|  d}t|S )zReturn a serialized version of a hf.co repo name and type, safe for disk storage
    as a single non-nested folder.

    Example: models--julien-c--EsperBERTo-small
    srp   )r  r    r   )rm   rj   partsrY   rY   rZ   repo_folder_name  s    r:  )r   
target_dirrC   c              
   C   s~   t |}|gt|j D ]`}zFt|j}|| k r\td| d dd| d|d dd W  dS  tk
rv   Y qX qdS )a$  Check disk usage and log a warning if there is not enough disk space to download the file.

    Args:
        expected_size (`int`):
            The expected size of the file in bytes.
        target_dir (`str`):
            The directory where the file will be stored after downloading.
    zLNot enough free disk space to download the file. The expected file size is: g    .Az.2fz MB. The target location z
 only has z MB free disk space.N)	r   listr1  r+  
disk_usagefreerU   rV   rS   )r   r;  rO   Ztarget_dir_freerY   rY   rZ   _check_disk_space  s    
"r?  auto)ri   rj   rk   rl   r   r   rB   	local_dirlocal_dir_use_symlinksr   r   r   r   r   r   r   r   r~   )rm   rn   ri   rj   rk   rl   r   r   rB   rA  rB  r   r   r   r   r   r   r   r   r~   rC   c          ,         s
  |dk	rt dt d}|rRt| |||||d}t|||||||||||||dS |dkr^t}|dkrjt}t|tr|t	|}t|	trt	|	}	|dkrd}|dk	r| d| }|dkrd}|t
krtd	| d
t	t
 tj|t| |d}tj|dd tjj|d }tjdkrD|ds4d|krDtd| dt|rt|||}tj|r|	dk	rt||	||
dS |S t| ||||d}t||||d}|}d}d}d}d}|sJzzt||||d}W n| tk
rV } z\|jjt}|dk	rD|sDt|d | | } | j j!ddd | "  t#|||  W 5 d}~X Y nX |j$}|dkrpt%d|j&}|dkrt%d|j'}|j(|kr|j(}|)dd W n t*j+j,t*j+j-fk
r    Y nx t*j+j.t*j+j/t0fk
r }! z|!}W 5 d}!~!X Y nD t1tfk
r     Y n* t*j2k
rH }! z|!}W 5 d}!~!X Y nX |dkr,|rbtdd}t|rx|}n:tj|d|}"tj3|"rt4|"}#|#5 }W 5 Q R X |dk	rt|||}tj|r|	dk	rt||	||
dS |S |rt6dn(t|t7st|t8r"|n
t6d||dk	s>t9d|dk	sPt9d tj|d!|}$t|||}tjtj:|$dd tjtj:|dd t#||| tj|r|s|	dk	rt||	||
dS |S tj|$r|s|	dk	rt|$|	||
dS t;|$|d"d# |S |$d$ }%tjdkrPt<tj=|%d%krPd&tj=|% }%tjdkrt<tj=|$d%krd&tj=|$ }$t>|%N tj|r|s|W  5 Q R  S |r|$d'  t?t@tAjBddf d( fd)d*}&|&}'tj rtC jD}(nd+}(ntEtFjGd,|d"d-}'d+}(|' n})tHId.||)j |dk	rztJ|tj:|)j tJ|tj:|$ |	dk	rztJ||	 tK||)||(||d/ W 5 Q R X |	dkrtHId0| d1|$  tL|)j|$ t;|$|dd# n tj|	|}*tjtj:|*dd tC|)jjDtMjNk}+|
dks(|
d2krd|+rdtHId0| d1|$  tL|)j|$ tHId3 t;|$|*d"d# nn|
d2kr|+stHId0| d1|$  tL|)j|$ tHId4 tOP|$|* n$tHId0| d5|* d6 tL|)j|* |*}W 5 Q R X ztQ|% W n t%k
r   Y nX |S )7u  Download a given file if it's not already present in the local cache.

    The new cache file layout looks like this:
    - The cache directory contains one subfolder per repo_id (namespaced by repo type)
    - inside each repo folder:
        - refs is a list of the latest known revision => commit_hash pairs
        - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on
          whether they're LFS files or not)
        - snapshots contains one subfolder per commit, each "commit" contains the subset of the files
          that have been resolved at that particular commit. Each filename is a symlink to the blob
          at that particular commit.

    If `local_dir` is provided, the file structure from the repo will be replicated in this location. You can configure
    how you want to move those files:
      - If `local_dir_use_symlinks="auto"` (default), files are downloaded and stored in the cache directory as blob
        files. Small files (<5MB) are duplicated in `local_dir` while a symlink is created for bigger files. The goal
        is to be able to manually edit and save small files without corrupting the cache while saving disk space for
        binary files. The 5MB threshold can be configured with the `HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD`
        environment variable.
      - If `local_dir_use_symlinks=True`, files are downloaded, stored in the cache directory and symlinked in `local_dir`.
        This is optimal in term of disk usage but files must not be manually edited.
      - If `local_dir_use_symlinks=False` and the blob files exist in the cache directory, they are duplicated in the
        local dir. This means disk usage is not optimized.
      - Finally, if `local_dir_use_symlinks=False` and the blob files do not exist in the cache directory, then the
        files are downloaded and directly placed under `local_dir`. This means if you need to download them again later,
        they will be re-downloaded entirely.

    ```
    [  96]  .
    └── [ 160]  models--julien-c--EsperBERTo-small
        ├── [ 160]  blobs
        │   ├── [321M]  403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
        │   ├── [ 398]  7cb18dc9bafbfcf74629a4b760af1b160957a83e
        │   └── [1.4K]  d7edf6bd2a681fb0175f7735299831ee1b22b812
        ├── [  96]  refs
        │   └── [  40]  main
        └── [ 128]  snapshots
            ├── [ 128]  2439f60ef33a0d46d85da5001d52aeda5b00ce9f
            │   ├── [  52]  README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
            │   └── [  76]  pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
            └── [ 128]  bbc77c8132af1cc5cf678da3f1ddf2de43606d48
                ├── [  52]  README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e
                └── [  76]  pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
    ```

    Args:
        repo_id (`str`):
            A user or an organization name and a repo name separated by a `/`.
        filename (`str`):
            The name of the file in the repo.
        subfolder (`str`, *optional*):
            An optional value corresponding to a folder inside the model repo.
        repo_type (`str`, *optional*):
            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
            `None` or `"model"` if downloading from a model. Default is `None`.
        revision (`str`, *optional*):
            An optional Git revision id which can be a branch name, a tag, or a
            commit hash.
        endpoint (`str`, *optional*):
            Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
            environment variable.
        library_name (`str`, *optional*):
            The name of the library to which the object corresponds.
        library_version (`str`, *optional*):
            The version of the library.
        cache_dir (`str`, `Path`, *optional*):
            Path to the folder where cached files are stored.
        local_dir (`str` or `Path`, *optional*):
            If provided, the downloaded file will be placed under this directory, either as a symlink (default) or
            a regular file (see description for more details).
        local_dir_use_symlinks (`"auto"` or `bool`, defaults to `"auto"`):
            To be used with `local_dir`. If set to "auto", the cache directory will be used and the file will be either
            duplicated or symlinked to the local directory depending on its size. It set to `True`, a symlink will be
            created, no matter the file size. If set to `False`, the file will either be duplicated from cache (if
            already exists) or downloaded from the Hub and not cached. See description for more details.
        user_agent (`dict`, `str`, *optional*):
            The user-agent info in the form of a dictionary or a string.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether the file should be downloaded even if it already exists in
            the local cache.
        proxies (`dict`, *optional*):
            Dictionary mapping protocol to the URL of the proxy passed to
            `requests.request`.
        etag_timeout (`float`, *optional*, defaults to `10`):
            When fetching ETag, how many seconds to wait for the server to send
            data before giving up which is passed to `requests.request`.
        resume_download (`bool`, *optional*, defaults to `False`):
            If `True`, resume a previously interrupted download.
        token (`str`, `bool`, *optional*):
            A token to be used for the download.
                - If `True`, the token is read from the HuggingFace config
                  folder.
                - If a string, it's used as the authentication token.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        legacy_cache_layout (`bool`, *optional*, defaults to `False`):
            If `True`, uses the legacy file cache layout i.e. just call [`hf_hub_url`]
            then `cached_download`. This is deprecated as the new cache layout is
            more powerful.

    Returns:
        Local path (string) of file or if networking is off, last version of
        file cached on disk.

    <Tip>

    Raises the following errors:

        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
          if `token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
          if ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
          if some parameter value is invalid
        - [`~utils.RepositoryNotFoundError`]
          If the repository to download from cannot be found. This may be because it doesn't exist,
          or because it is set to `private` and you do not have access.
        - [`~utils.RevisionNotFoundError`]
          If the revision to download from cannot be found.
        - [`~utils.EntryNotFoundError`]
          If the file to download cannot be found.
        - [`~utils.LocalEntryNotFoundError`]
          If network is disabled or unavailable and file is not found in cache.

    </Tip>
    NzThe `force_filename` parameter is deprecated as a new caching system, which keeps the filenames as they are on the Hub, is now in place.Trh   r   ro   rp   modelInvalid repo type: . Accepted repo types are: )rm   rj   rD   rH   z..\z\..\z*Invalid filename: cannot handle filename 'zB' on Windows. Please ask the repository owner to rename this file.)use_symlinks)rj   rk   rl   r   )rv   r   r   r   	.no_existr0  zODistant resource does not seem to be on huggingface.co (missing commit header).r   r   zbWe have no connection or you passed local_files_only, so force_download is not an accepted option.r/  zCannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.zAn error happened while trying to locate the file on the Hub and we cannot find the requested files in the local cache. Please check your connection and try again or make sure your Internet connection is on.z)etag must have been retrieved from serverz0commit_hash must have been retrieved from serverZblobsFr!  r   r   r   r   r   c               	   3   s    t  d} | V  W 5 Q R X d S r   r   r   r   rY   rZ   r   y  s    z0hf_hub_download.<locals>._resumable_file_managerr   r   r   r   r   zStoring z in cache at r@  zCreate symlink to local dirzADuplicate in local dir (small file and use_symlink set to 'auto')z in local_dir at z (not cached).)RrU   rV   r   rw   r  r   r   r   r   rI   r!   rr   rL   rO   r   r:  rM   r  rT   rt   REGEX_COMMIT_HASHr   _get_pointer_pathr   _to_local_dirr)   get_hf_file_metadatar#   r   r   r   r   r2  r3  rN   r7  r^   rS   r_   ra   r`   r  r  r  r  r   r  r   r   r'   	HTTPErrorisfiler   readr%   r&   r$   AssertionErrorrQ   r#  ru   r  r   r   r
   r  r  r  r  r   r  r  r   r  r?  r   r  r   'HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLDr+  r-  r%  ),rm   rn   ri   rj   rk   rl   r   r   rB   rA  rB  r   r   r   r   r   r   r   r   r~   rv   r.  relative_filenamepointer_pathr   r  r_   r^   r   Zhead_call_errorr   
http_errorZno_exist_file_patherrorr6  r   Z	blob_pathr  r   r  r   r   local_dir_filepathZis_big_filerY   r   rZ   hf_hub_download  s    	










   

""    


	


rW  )rm   rn   rB   rk   rj   rC   c              	   C   sR  |dkrd}|dkrd}|t kr8td| dtt  |dkrDt}| dd}tj|| d| }tj|sxdS tj|d	}tj|d
}tj|d}	tj|rtj||}
tj	|
rt
|
}| }W 5 Q R X tj	tj|	||rtS tj|sdS t|}||kr,dS tj|||}tj	|rN|S dS )a  
    Explores the cache to return the latest cached file for a given revision if found.

    This function will not raise any exception if the file in not cached.

    Args:
        cache_dir (`str` or `os.PathLike`):
            The folder where the cached files lie.
        repo_id (`str`):
            The ID of the repo on huggingface.co.
        filename (`str`):
            The filename to look for inside `repo_id`.
        revision (`str`, *optional*):
            The specific model version to use. Will default to `"main"` if it's not provided and no `commit_hash` is
            provided either.
        repo_type (`str`, *optional*):
            The type of the repository. Will default to `"model"`.

    Returns:
        `Optional[str]` or `_CACHED_NO_EXIST`:
            Will return `None` if the file was not cached. Otherwise:
            - The exact path to the cached file if it's found in the cache
            - A special value `_CACHED_NO_EXIST` if the file does not exist at the given commit hash and this fact was
              cached.

    Example:

    ```python
    from huggingface_hub import try_to_load_from_cache, _CACHED_NO_EXIST

    filepath = try_to_load_from_cache()
    if isinstance(filepath, str):
        # file exists and is cached
        ...
    elif filepath is _CACHED_NO_EXIST:
        # non-existence of file is cached
        ...
    else:
        # file is not cached
        ...
    ```
    NmainrC  rD  rE  rp   z--zs--r/  	snapshotsrG  )r!   rr   rI   r   replacerL   rO   r   isdirrN  r   rO  _CACHED_NO_EXISTr   r
  )rm   rn   rB   rk   rj   Z	object_idZ
repo_cacheZrefs_dirZsnapshots_dirZno_exist_dirZrevision_filer   Zcached_shasZcached_filerY   rY   rZ   try_to_load_from_cache  s:    2


r]  )rv   r   r   r   rC   c              	   C   s   t |d}d|d< td| |dd||d}t| t|jtt|jtpT|jd|jd	ph|j	j
t|jtp|jd
dS )a  Fetch metadata of a file versioned on the Hub for a given url.

    Args:
        url (`str`):
            File url, for example returned by [`hf_hub_url`].
        token (`str` or `bool`, *optional*):
            A token to be used for the download.
                - If `True`, the token is read from the HuggingFace config
                  folder.
                - If `False` or `None`, no token is provided.
                - If a string, it's used as the authentication token.
        proxies (`dict`, *optional*):
            Dictionary mapping protocol to the URL of the proxy passed to
            `requests.request`.
        timeout (`float`, *optional*, defaults to 10):
            How many seconds to wait for the server to send metadata before giving up.

    Returns:
        A [`HfFileMetadata`] object containing metadata such as location, etag, size and
        commit_hash.
    )r   r   r   r   FTr   r   r   r   )r^   r_   r`   ra   )r)   r   r1   r]   r   r   r   r  r   requestrv   r  r   )rv   r   r   r   r   r   rY   rY   rZ   rL    s*    
	

	rL  )valuerC   c              	   C   s*   z
t | W S  ttfk
r$   Y d S X d S )N)rg   	TypeErrorrr   )r_  rY   rY   rZ   r  [  s    
r  )r  r   rC   c                 C   sb   t |jjdt   }z,|  t | j}t	| t
| W 5 |  X t| | dS )aV  Set correct permission before moving a blob from tmp directory to cache dir.

    Do not take into account the `umask` from the process as there is no convenient way
    to get it that is thread-safe.

    See:
    - About umask: https://docs.python.org/3/library/os.html#os.umask
    - Thread-safety: https://stackoverflow.com/a/70343066
    - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591
    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141
    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215
    Ztmp_N)r   r2  uuiduuid4unlinkrN   r  st_moderL   chmodS_IMODEr+  r,  )r  r   Ztmp_fileZcache_dir_moderY   rY   rZ   r  b  s    
r  )r.  rk   rR  rC   c                 C   s`   t j| d}t j|||}tt j|tt j|jkr\td|  d| d| d|S )NrY  zXInvalid pointer path: cannot create pointer path in snapshot folder if `storage_folder='z'`, `revision='z'` and `relative_filename='z'`.)rL   rO   r   r   r  r1  rr   )r.  rk   rR  Zsnapshot_pathrS  rY   rY   rZ   rJ  {  s    "rJ  )rO   rA  rR  rF  rC   c                 C   s   t j||}tt j|tt j|jkrFtd| d| dt jt j|dd t j	| }|dkrt 
|jtjk}|rt||dd nt|| |S )	zPlace a file in a local dir (different than cache_dir).

    Either symlink to blob file in cache or duplicate file depending on `use_symlinks` and file size.
    zCannot copy file 'z' to local dir 'z,': file would not be in the local directory.TrD   r@  FrH  )rL   rO   r   r   r  r1  rr   rM   rQ   r*  r  r  r   rQ  r#  r+  r-  )rO   rA  rR  rF  rV  Zreal_blob_pathrY   rY   rZ   rK    s    "rK  )N)N)NF)N)F)F)NNN)NNr   )r   r  r  r   rL   rer+  r  r  ra  rU   
contextlibr   dataclassesr   	functoolsr   hashlibr   pathlibr   typingr   r   r	   r
   r   r   r   r   urllib.parser   r   r  Zfilelockr   Zrequests.exceptionsr   r   Zhuggingface_hubr   ro   r   r   r   r   r   r   r   r   r   r   r    r!   r"   utilsr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   Zutils._headersr>   Zutils._runtimer?   Zutils._typingr@   Z
get_loggerrb   r   compiler   rA   rI   r   rf   r[   objectr\  Z_CACHED_NO_EXIST_TrI  r]   rw   r}   r   r   r  r   r   rg   floatResponser   r   r   r  r  r$  r#  r7  r:  r?  rW  r]  rL  r  r  rJ  rK  rY   rY   rY   rZ   <module>   s   (8p

<
[!  
0a^  P   g   \   ;   