U
    9%eq                     @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZ ddlmZ d	d
lmZ eeZed ZG dd deZG dd deZeddG dd dZeddG dd dZeddG dd dZeddG dd dZ eddG dd dZ!d,eee"e	f  e!dddZ#e	edd d!Z$e%e"d"d#d$Z&d%Z'e(e"d&d'd(Z)e	e"dd)d*d+Z*dS )-z4Contains utilities to manage the HF cache directory.    N)defaultdict)	dataclass)Path)Dict	FrozenSetListLiteralOptionalSetUnion   )HUGGINGFACE_HUB_CACHE   )logging)modeldatasetspacec                       sB   e Zd ZU dZeeef ed< eeeef d fddZ  Z	S )CacheNotFoundz9Exception thrown when the Huggingface cache is not found.	cache_dir)msgr   c                    s   t  j|f|| || _d S N)super__init__r   )selfr   r   argskwargs	__class__ c/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/huggingface_hub/utils/_cache_manager.pyr   &   s    zCacheNotFound.__init__)
__name__
__module____qualname____doc__r   strr   __annotations__r   __classcell__r   r   r   r   r   !   s   
r   c                   @   s   e Zd ZdZdS )CorruptedCacheExceptionzGException for any unexpected structure in the Huggingface cache-system.N)r    r!   r"   r#   r   r   r   r   r'   +   s   r'   T)frozenc                   @   sx   e Zd ZU dZeed< eed< eed< eed< eed< eed< e	edd	d
Z
e	edddZe	edddZdS )CachedFileInfoa  Frozen data structure holding information about a single cached file.

    Args:
        file_name (`str`):
            Name of the file. Example: `config.json`.
        file_path (`Path`):
            Path of the file in the `snapshots` directory. The file path is a symlink
            referring to a blob in the `blobs` folder.
        blob_path (`Path`):
            Path of the blob file. This is equivalent to `file_path.resolve()`.
        size_on_disk (`int`):
            Size of the blob file in bytes.
        blob_last_accessed (`float`):
            Timestamp of the last time the blob file has been accessed (from any
            revision).
        blob_last_modified (`float`):
            Timestamp of the last time the blob file has been modified/created.

    <Tip warning={true}>

    `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
    are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
    for more details.

    </Tip>
    	file_name	file_path	blob_pathsize_on_diskblob_last_accessedblob_last_modifiedreturnc                 C   s
   t | jS )z
        (property) Timestamp of the last time the blob file has been accessed (from any
        revision), returned as a human-readable string.

        Example: "2 weeks ago".
        )_format_timesincer.   r   r   r   r   blob_last_accessed_strT   s    z%CachedFileInfo.blob_last_accessed_strc                 C   s
   t | jS )z
        (property) Timestamp of the last time the blob file has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        )r2   r/   r3   r   r   r   blob_last_modified_str^   s    z%CachedFileInfo.blob_last_modified_strc                 C   s
   t | jS )zi
        (property) Size of the blob file as a human-readable string.

        Example: "42.2K".
        _format_sizer-   r3   r   r   r   size_on_disk_strh   s    zCachedFileInfo.size_on_disk_strN)r    r!   r"   r#   r$   r%   r   intfloatpropertyr4   r5   r8   r   r   r   r   r)   /   s   
		r)   c                   @   s   e Zd ZU dZeed< eed< eed< ee	 ed< ee ed< e
ed< eedd	d
ZeedddZeedddZdS )CachedRevisionInfoaN  Frozen data structure holding information about a revision.

    A revision correspond to a folder in the `snapshots` folder and is populated with
    the exact tree structure as the repo on the Hub but contains only symlinks. A
    revision can be either referenced by 1 or more `refs` or be "detached" (no refs).

    Args:
        commit_hash (`str`):
            Hash of the revision (unique).
            Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
        snapshot_path (`Path`):
            Path to the revision directory in the `snapshots` folder. It contains the
            exact tree structure as the repo on the Hub.
        files: (`FrozenSet[CachedFileInfo]`):
            Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
        refs (`FrozenSet[str]`):
            Set of `refs` pointing to this revision. If the revision has no `refs`, it
            is considered detached.
            Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
        size_on_disk (`int`):
            Sum of the blob file sizes that are symlink-ed by the revision.
        last_modified (`float`):
            Timestamp of the last time the revision has been created/modified.

    <Tip warning={true}>

    `last_accessed` cannot be determined correctly on a single revision as blob files
    are shared across revisions.

    </Tip>

    <Tip warning={true}>

    `size_on_disk` is not necessarily the sum of all file sizes because of possible
    duplicated files. Besides, only blobs are taken into account, not the (negligible)
    size of folders and symlinks.

    </Tip>
    commit_hashsnapshot_pathr-   filesrefslast_modifiedr0   c                 C   s
   t | jS )z
        (property) Timestamp of the last time the revision has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        r2   rA   r3   r   r   r   last_modified_str   s    z$CachedRevisionInfo.last_modified_strc                 C   s
   t | jS zn
        (property) Sum of the blob file sizes as a human-readable string.

        Example: "42.2K".
        r6   r3   r   r   r   r8      s    z#CachedRevisionInfo.size_on_disk_strc                 C   s
   t | jS )zC
        (property) Total number of files in the revision.
        )lenr?   r3   r   r   r   nb_files   s    zCachedRevisionInfo.nb_filesN)r    r!   r"   r#   r$   r%   r   r9   r   r)   r:   r;   rC   r8   rF   r   r   r   r   r<   r   s   
(	r<   c                   @   s   e Zd ZU dZeed< eed< eed< eed< eed< e	e
 ed< eed< eed	< eed
ddZeed
ddZeed
ddZeeee
f d
ddZdS )CachedRepoInfoad  Frozen data structure holding information about a cached repository.

    Args:
        repo_id (`str`):
            Repo id of the repo on the Hub. Example: `"google/fleurs"`.
        repo_type (`Literal["dataset", "model", "space"]`):
            Type of the cached repo.
        repo_path (`Path`):
            Local path to the cached repo.
        size_on_disk (`int`):
            Sum of the blob file sizes in the cached repo.
        nb_files (`int`):
            Total number of blob files in the cached repo.
        revisions (`FrozenSet[CachedRevisionInfo]`):
            Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
        last_accessed (`float`):
            Timestamp of the last time a blob file of the repo has been accessed.
        last_modified (`float`):
            Timestamp of the last time a blob file of the repo has been modified/created.

    <Tip warning={true}>

    `size_on_disk` is not necessarily the sum of all revisions sizes because of
    duplicated files. Besides, only blobs are taken into account, not the (negligible)
    size of folders and symlinks.

    </Tip>

    <Tip warning={true}>

    `last_accessed` and `last_modified` reliability can depend on the OS you are using.
    See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
    for more details.

    </Tip>
    repo_id	repo_type	repo_pathr-   rF   	revisionslast_accessedrA   r0   c                 C   s
   t | jS )z
        (property) Last time a blob file of the repo has been accessed, returned as a
        human-readable string.

        Example: "2 weeks ago".
        )r2   rL   r3   r   r   r   last_accessed_str   s    z CachedRepoInfo.last_accessed_strc                 C   s
   t | jS )z
        (property) Last time a blob file of the repo has been modified, returned as a
        human-readable string.

        Example: "2 weeks ago".
        rB   r3   r   r   r   rC      s    z CachedRepoInfo.last_modified_strc                 C   s
   t | jS rD   r6   r3   r   r   r   r8     s    zCachedRepoInfo.size_on_disk_strc                 C   s   dd | j D S )zQ
        (property) Mapping between `refs` and revision data structures.
        c                 S   s   i | ]}|j D ]
}||qqS r   )r@   ).0revisionrefr   r   r   
<dictcomp>  s
        z'CachedRepoInfo.refs.<locals>.<dictcomp>)rK   r3   r   r   r   r@     s    zCachedRepoInfo.refsN)r    r!   r"   r#   r$   r%   REPO_TYPE_Tr   r9   r   r<   r:   r;   rM   rC   r8   r   r@   r   r   r   r   rG      s"   
%		rG   c                   @   sj   e Zd ZU dZeed< ee ed< ee ed< ee ed< ee ed< ee	ddd	Z
d
dddZd
S )DeleteCacheStrategya  Frozen data structure holding the strategy to delete cached revisions.

    This object is not meant to be instantiated programmatically but to be returned by
    [`~utils.HFCacheInfo.delete_revisions`]. See documentation for usage example.

    Args:
        expected_freed_size (`float`):
            Expected freed size once strategy is executed.
        blobs (`FrozenSet[Path]`):
            Set of blob file paths to be deleted.
        refs (`FrozenSet[Path]`):
            Set of reference file paths to be deleted.
        repos (`FrozenSet[Path]`):
            Set of entire repo paths to be deleted.
        snapshots (`FrozenSet[Path]`):
            Set of snapshots to be deleted (directory of symlinks).
    expected_freed_sizeblobsr@   repos	snapshotsr0   c                 C   s
   t | jS )zt
        (property) Expected size that will be freed as a human-readable string.

        Example: "42.2K".
        )r7   rT   r3   r   r   r   expected_freed_size_str/  s    z+DeleteCacheStrategy.expected_freed_size_strNc                 C   sx   | j D ]}t|dd q| jD ]}t|dd q| jD ]}t|dd q6| jD ]}t|dd qNtd| j d dS )	a  Execute the defined strategy.

        <Tip warning={true}>

        If this method is interrupted, the cache might get corrupted. Deletion order is
        implemented so that references and symlinks are deleted before the actual blob
        files.

        </Tip>

        <Tip warning={true}>

        This method is irreversible. If executed, cached files are erased and must be
        downloaded again.

        </Tip>
        repo)	path_typeZsnapshotrP   ZblobzCache deletion done. Saved .N)rV   _try_delete_pathrW   r@   rU   loggerinforX   )r   pathr   r   r   execute8  s    



zDeleteCacheStrategy.execute)r    r!   r"   r#   r9   r%   r   r   r;   r$   rX   r`   r   r   r   r   rS     s   
rS   c                   @   sT   e Zd ZU dZeed< ee ed< ee	 ed< e
edddZeedd	d
ZdS )HFCacheInfoa  Frozen data structure holding information about the entire cache-system.

    This data structure is returned by [`scan_cache_dir`] and is immutable.

    Args:
        size_on_disk (`int`):
            Sum of all valid repo sizes in the cache-system.
        repos (`FrozenSet[CachedRepoInfo]`):
            Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
            cache-system while scanning.
        warnings (`List[CorruptedCacheException]`):
            List of [`~CorruptedCacheException`] that occurred while scanning the cache.
            Those exceptions are captured so that the scan can continue. Corrupted repos
            are skipped from the scan.

    <Tip warning={true}>

    Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
    some cached repos are corrupted, their sizes are not taken into account.

    </Tip>
    r-   rV   warningsr0   c                 C   s
   t | jS )z
        (property) Sum of all valid repo sizes in the cache-system as a human-readable
        string.

        Example: "42.2K".
        r6   r3   r   r   r   r8   ~  s    zHFCacheInfo.size_on_disk_str)rK   r1   c                 G   s  t |}tt }| jD ]4}|jD ](}|j|kr || | ||j q qt|dkrnt	dd
|  t  }t  }t  }t  }	d}
| D ]\}}|j| }t|dkr||j |
|j7 }
q|D ]}|	|j |jD ]}||jd |  q|jD ]p}|j|krd}|D ]8}|jD ]}|j|jkr$d} qDq$|s qTq|r||j |
|j7 }
qqqtt|t|t|t|	|
dS )a  Prepare the strategy to delete one or more revisions cached locally.

        Input revisions can be any revision hash. If a revision hash is not found in the
        local cache, a warning is thrown but no error is raised. Revisions can be from
        different cached repos since hashes are unique across repos,

        Examples:
        ```py
        >>> from huggingface_hub import scan_cache_dir
        >>> cache_info = scan_cache_dir()
        >>> delete_strategy = cache_info.delete_revisions(
        ...     "81fd1d6e7847c99f5862c9fb81387956d99ec7aa"
        ... )
        >>> print(f"Will free {delete_strategy.expected_freed_size_str}.")
        Will free 7.9K.
        >>> delete_strategy.execute()
        Cache deletion done. Saved 7.9K.
        ```

        ```py
        >>> from huggingface_hub import scan_cache_dir
        >>> scan_cache_dir().delete_revisions(
        ...     "81fd1d6e7847c99f5862c9fb81387956d99ec7aa",
        ...     "e2983b237dccf3ab4937c97fa717319a9ca1a96d",
        ...     "6c0e6080953db56375760c0471a8c5f2929baf11",
        ... ).execute()
        Cache deletion done. Saved 8.6G.
        ```

        <Tip warning={true}>

        `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
        be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
        allows having a dry run before actually executing the deletion.

        </Tip>
        r   z,Revision(s) not found - cannot delete them: z, r@   TF)rU   r@   rV   rW   rT   )setr   rV   rK   r=   addremoverE   r]   warningjoinitemsrJ   r-   r>   r@   r?   r,   rS   	frozenset)r   rK   Zhashes_to_deleteZrepos_with_revisionsrY   rO   Zdelete_strategy_blobsZdelete_strategy_refsZdelete_strategy_reposZdelete_strategy_snapshotsZ#delete_strategy_expected_freed_sizeZaffected_repoZrevisions_to_deleteZother_revisionsZrevision_to_deleterP   fileZis_file_aloneZrev_filer   r   r   delete_revisions  sX    &









zHFCacheInfo.delete_revisionsN)r    r!   r"   r#   r9   r%   r   rG   r   r'   r;   r$   r8   rS   rk   r   r   r   r   ra   a  s   
	ra   )r   r1   c                 C   s   | dkrt } t|   } |  s8td|  d| d|  rPtd|  dt }g }| 	 D ]D}z|
t| W qb tk
r } z|| W 5 d}~X Y qbX qbtt|tdd |D |dS )	at  Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.

    Use `scan_cache_dir` in order to programmatically scan your cache-system. The cache
    will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
    will be thrown internally but captured and returned in the [`~HFCacheInfo`]
    structure. Only valid repos get a proper report.

    ```py
    >>> from huggingface_hub import scan_cache_dir

    >>> hf_cache_info = scan_cache_dir()
    HFCacheInfo(
        size_on_disk=3398085269,
        repos=frozenset({
            CachedRepoInfo(
                repo_id='t5-small',
                repo_type='model',
                repo_path=PosixPath(...),
                size_on_disk=970726914,
                nb_files=11,
                revisions=frozenset({
                    CachedRevisionInfo(
                        commit_hash='d78aea13fa7ecd06c29e3e46195d6341255065d5',
                        size_on_disk=970726339,
                        snapshot_path=PosixPath(...),
                        files=frozenset({
                            CachedFileInfo(
                                file_name='config.json',
                                size_on_disk=1197
                                file_path=PosixPath(...),
                                blob_path=PosixPath(...),
                            ),
                            CachedFileInfo(...),
                            ...
                        }),
                    ),
                    CachedRevisionInfo(...),
                    ...
                }),
            ),
            CachedRepoInfo(...),
            ...
        }),
        warnings=[
            CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
            CorruptedCacheException(...),
            ...
        ],
    )
    ```

    You can also print a detailed report directly from the `huggingface-cli` using:
    ```text
    > huggingface-cli scan-cache
    REPO ID                     REPO TYPE SIZE ON DISK NB FILES REFS                LOCAL PATH
    --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
    glue                        dataset         116.3K       15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
    google/fleurs               dataset          64.9M        6 main, refs/pr/1     /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
    Jean-Baptiste/camembert-ner model           441.0M        7 main                /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
    bert-base-cased             model             1.9G       13 main                /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
    t5-base                     model            10.1K        3 main                /Users/lucain/.cache/huggingface/hub/models--t5-base
    t5-small                    model           970.7M       11 refs/pr/1, main     /Users/lucain/.cache/huggingface/hub/models--t5-small

    Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
    Got 1 warning(s) while scanning. Use -vvv to print details.
    ```

    Args:
        cache_dir (`str` or `Path`, `optional`):
            Cache directory to cache. Defaults to the default HF cache directory.

    <Tip warning={true}>

    Raises:

        `CacheNotFound`
          If the cache directory does not exist.

        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
          If the cache directory is a file, instead of a directory.

    </Tip>

    Returns: a [`~HFCacheInfo`] object.
    NzCache directory not found: zV. Please use `cache_dir` argument or set `HUGGINGFACE_HUB_CACHE` environment variable.)r   z1Scan cache expects a directory but found a file: c                 s   s   | ]}|j V  qd S r   )r-   )rN   rY   r   r   r   	<genexpr>b  s     z!scan_cache_dir.<locals>.<genexpr>)rV   r-   rb   )r   r   
expanduserresolveexistsr   is_file
ValueErrorrc   iterdirrd   _scan_cached_repor'   appendra   ri   sum)r   rV   rb   rJ   er   r   r   scan_cache_dir  s0    V

rw   )rJ   r1   c                    s  |   std|  d| jkr.td|  | jjddd\}}|dd }|dd}|d	krxtd
| d|  di  | d }| d }| r|  std| tt}| r$| rtd| |	dD ]F}|  rqt
||}| }| }	W 5 Q R X ||	 | qt }
| D ],}| rPtd| t }|	dD ]|}|  rrq`t| }| std| | kr|  |< |t|j| | j| | j | jd q`t|dkrt fdd|D }n
| j}|
t|jt|t||jt t fddtdd |D D ||d q2t|dkrtdt| d|  dt dkrtdd   D }tdd   D }n|  }|j}|j}tt || |t|
tdd   D ||d S )!zScan a single cache repo and return information about it.

    Any unexpected behavior will raise a [`~CorruptedCacheException`].
    zRepo path is not a directory: z--z6Repo path is not a valid HuggingFace cache directory: r   )maxsplitN/>   r   r   r   z8Repo type must be `dataset`, `model` or `space`, found `z` (z).rW   r@   z,Snapshots dir doesn't exist in cached repo: z!Refs directory cannot be a file: z**/*z*Snapshots folder corrupted. Found a file: zBlob missing (broken symlink): )r*   r+   r-   r,   r.   r/   r   c                 3   s   | ]} |j  jV  qd S r   )r,   st_mtimerN   rj   Z
blob_statsr   r   rl     s     z$_scan_cached_repo.<locals>.<genexpr>c                 3   s   | ]} | j V  qd S r   st_size)rN   r,   r}   r   r   rl     s    c                 s   s   | ]}|j V  qd S r   )r,   r|   r   r   r   rl     s     )r=   r?   r@   r-   r>   rA   z-Reference(s) refer to missing commit hashes: z (c                 s   s   | ]}|j V  qd S r   )st_atimerN   statr   r   r   rl     s     c                 s   s   | ]}|j V  qd S r   )r{   r   r   r   r   rl     s     c                 s   s   | ]}|j V  qd S r   r~   r   r   r   r   rl     s     )rF   rH   rJ   rI   rK   r-   rL   rA   ) is_dirr'   namesplitreplacero   r   rc   rp   globr$   relative_toopenreadrd   rr   r   rn   r   r)   r   r   r{   rE   maxr<   ri   popru   dictvaluesrG   )rJ   rI   rH   Zsnapshots_pathZ	refs_pathZrefs_by_hashZref_pathref_namefr=   Zcached_revisionsZrevision_pathcached_filesr+   r,   Zrevision_last_modifiedZrepo_last_accessedZrepo_last_modifiedZ
repo_statsr   r}   r   rs   g  s    







rs   )numr1   c                 C   sD   t | }dD ]*}t|dk r.|d|   S |d }q|ddS )zkFormat size in bytes into a human-readable string.

    Taken from https://stackoverflow.com/a/1094933
    ) KMGTPEZg     @@z3.1fz.1fY)r:   abs)r   Znum_funitr   r   r   r7     s    
r7   ))secondr   <   )minuter   r   )houri     )dayiQ    )weeki:	 r   )monthi '    )yeari3N)tsr1   c                 C   sh   t   |  }|dk rdS tD ]*\}}}t|| }|dk	r||kr qHq| d| |dkr^dnd dS )	zFormat timestamp in seconds into a human-readable string, relative to now.

    Vaguely inspired by Django's `timesince` formatter.
       za few seconds agoN r   sr   z ago)time_TIMESINCE_CHUNKSround)r   deltalabeldividerZ	max_valuevaluer   r   r   r2     s    r2   )r_   rZ   r1   c              	   C   s   t d| d|   z"|  r,t|  n
t|  W n^ tk
rh   t jd| d|  ddd Y n0 t	k
r   t jd| d|  ddd Y nX d	S )
aE  Try to delete a local file or folder.

    If the path does not exists, error is logged as a warning and then ignored.

    Args:
        path (`Path`)
            Path to delete. Can be a file or a folder.
        path_type (`str`)
            What path are we deleting ? Only for logging purposes. Example: "snapshot".
    zDelete z: zCouldn't delete z: file not found ()T)exc_infoz: permission denied (N)
r]   r^   rp   osre   shutilrmtreeFileNotFoundErrorrf   PermissionError)r_   rZ   r   r   r   r\     s     r\   )N)+r#   r   r   r   collectionsr   dataclassesr   pathlibr   typingr   r   r   r   r	   r
   r   	constantsr   r   r   Z
get_loggerr    r]   rR   	Exceptionr   r'   r)   r<   rG   rS   ra   r$   rw   rs   r9   r7   r   r:   r2   r\   r   r   r   r   <module>   s>   $

BLUK w 