U
    +-ed                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZ dd	lmZ dd
lm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl(m0Z1 erddl2m3Z3 e-4e5Z6ed Z7dZ8e
G dd dZ9e
G dd dZ:e
G dd dZ;e<e<dddZ=ee;e:e9f Z>ee> ddddZ?e/ddd ee; e<e<ee< ee< e@d!d"d#ZAeBd$d%d&ZCe/d2ee; e<e<ee< e<ee< eDee<e7f d(d)d*ZEe/d3ee: e<e<ee< e<ee< eee<ee< f d+f d,d-d.ZFd4ee> ee<e7f eee<ee< f d+f e<ee< ee< eee<ef  d/d0d1ZGdS )5z<
Type definitions and utilities for the `create_commit` API
    N)defaultdict)contextmanager)	dataclassfield)groupby)PathPurePosixPath)TYPE_CHECKINGAnyBinaryIODictIterableIteratorListLiteralOptionalTupleUnion)
thread_map)get_session   )ENDPOINTHF_HUB_ENABLE_HF_TRANSFER)
UploadInfo
lfs_uploadpost_lfs_batch_info)EntryNotFoundErrorbuild_hf_headerschunk_iterablehf_raise_for_statusloggingtqdm_stream_filevalidate_hf_hub_args)tqdm)RepoFilelfsregulari  c                   @   s:   e Zd ZU dZeed< dZeee	d f ed< dd Z
dS )CommitOperationDeletea  
    Data structure holding necessary info to delete a file or a folder from a repository
    on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
            for a file or `"checkpoints/1fec34a/"` for a folder.
        is_folder (`bool` or `Literal["auto"]`, *optional*)
            Whether the Delete Operation applies to a folder or not. If "auto", the path
            type (file or folder) is guessed automatically by looking if path ends with
            a "/" (folder) or not (file). To explicitly set the path type, you can set
            `is_folder=True` or `is_folder=False`.
    path_in_repoauto	is_folderc                 C   sF   t | j| _| jdkr$| jd| _t| jtsBtd| j dd S )Nr*   /zNWrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got 'z'.)_validate_path_in_repor)   r+   endswith
isinstancebool
ValueErrorself r4   \/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/huggingface_hub/_commit_api.py__post_init__E   s    
z#CommitOperationDelete.__post_init__N)__name__
__module____qualname____doc__str__annotations__r+   r   r0   r   r6   r4   r4   r4   r5   r(   1   s   
r(   c                   @   s:   e Zd ZU dZeed< eed< dZee ed< dd ZdS )CommitOperationCopyab  
    Data structure holding necessary info to copy a file in a repository on the Hub.

    Limitations:
      - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
      - Cross-repository copies are not supported.

    Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.

    Args:
        src_path_in_repo (`str`):
            Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
        path_in_repo (`str`):
            Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
        src_revision (`str`, *optional*):
            The git revision of the file to be copied. Can be any valid git revision.
            Default to the target commit revision.
    src_path_in_repor)   Nsrc_revisionc                 C   s   t | j| _t | j| _d S N)r-   r>   r)   r2   r4   r4   r5   r6   i   s    z!CommitOperationCopy.__post_init__)	r7   r8   r9   r:   r;   r<   r?   r   r6   r4   r4   r4   r5   r=   P   s
   
r=   c                   @   sx   e Zd ZU dZeed< eeeee	f ed< e
dddZeed< ddd	d
Zedeee	 dddZedddZdS )CommitOperationAdda  
    Data structure holding necessary info to upload a file to a repository on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
        path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
            Either:
            - a path to a local file (as `str` or `pathlib.Path`) to upload
            - a buffer of bytes (`bytes`) holding the content of the file to upload
            - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
                with `open(path, "rb")`. It must support `seek()` and `tell()` methods.

    Raises:
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
            `seek()` and `tell()`.
    r)   path_or_fileobjF)initreprupload_infoNreturnc              
   C   s(  t | j| _t| jtr$t| j| _t| jtrdtjtj	| j}tj
|s~td| dnt| jtjtfs~tdt| jtjrz| j  | jdtj W n0 ttfk
r } ztd|W 5 d}~X Y nX t| jtrt| j| _n,t| jtrt| j| _nt| j| _dS )z6Validates `path_or_fileobj` and compute `upload_info`.zProvided path: 'z(' is not a file on the local file systemzpath_or_fileobj must be either an instance of str, bytes or io.BufferedIOBase. If you passed a file-like object, make sure it is in binary mode.r   zNpath_or_fileobj is a file-like object but does not implement seek() and tell()N)r-   r)   r/   rB   r   r;   ospathnormpath
expanduserisfiler1   ioBufferedIOBasebytestellseekSEEK_CUROSErrorAttributeErrorr   	from_pathrE   
from_bytesZfrom_fileobj)r3   rB   excr4   r4   r5   r6      s4    
z CommitOperationAdd.__post_init__)	with_tqdmrG   c              	   c   s   t | jtst | jtrZ|r:t| j}|V  W 5 Q R X qt| jd}|V  W 5 Q R X nLt | jtrvt| jV  n0t | jtj	r| j
 }| jV  | j|tj dS )u  
        A context manager that yields a file-like object allowing to read the underlying
        data behind `path_or_fileobj`.

        Args:
            with_tqdm (`bool`, *optional*, defaults to `False`):
                If True, iterating over the file object will display a progress bar. Only
                works if the file-like object is a path to a file. Pure bytes and buffers
                are not supported.

        Example:

        ```python
        >>> operation = CommitOperationAdd(
        ...        path_in_repo="remote/dir/weights.h5",
        ...        path_or_fileobj="./local/weights.h5",
        ... )
        CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')

        >>> with operation.as_file() as file:
        ...     content = file.read()

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     while True:
        ...         data = file.read(1024)
        ...         if not data:
        ...              break
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     requests.put(..., data=file)
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
        ```
        rbN)r/   rB   r;   r   r!   openrO   rM   BytesIOrN   rP   rQ   SEEK_SET)r3   rX   fileZprev_posr4   r4   r5   as_file   s    $
zCommitOperationAdd.as_filec              
   C   s.   |   }t| W  5 Q R  S Q R X dS )z[
        The base64-encoded content of `path_or_fileobj`

        Returns: `bytes`
        N)r^   base64	b64encoderead)r3   r]   r4   r4   r5   
b64content   s    
zCommitOperationAdd.b64content)F)r7   r8   r9   r:   r;   r<   r   r   rO   r   r   rE   r   r6   r   r0   r   r^   rb   r4   r4   r4   r5   rA   n   s   
#1rA   )r)   rG   c                 C   s   |  dr| dd  } | dks0| dks0|  dr@td|  d|  drV| d	d  } td
d | dD r~td|  d| S )Nr,   r   .z..z../z,Invalid `path_in_repo` in CommitOperation: ''z./   c                 s   s   | ]}|d kV  qdS )z.gitNr4   ).0partr4   r4   r5   	<genexpr>   s     z)_validate_path_in_repo.<locals>.<genexpr>z^Invalid `path_in_repo` in CommitOperation: cannot update files under a '.git/' folder (path: 'z').)
startswithr1   anysplit)r)   r4   r4   r5   r-      s    


r-   )
operationsrG   c                 C   s   t t}| D ]}|j}t|trr|| dkr>td| d ||  d7  < t|jD ]}|t	|  d7  < qXt|t
r|t	t| dkr|jrtd| d qtd| d qdS )a  
    Warn user when a list of operations is expected to overwrite itself in a single
    commit.

    Rules:
    - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
      message is triggered.
    - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
      by a `CommitOperationDelete`, a warning is triggered.
    - If a `CommitOperationDelete` deletes a filepath that is then updated by a
      `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
      delete before upload) but can happen if a user deletes an entire folder and then
      add new files to it.
    r   zBAbout to update multiple times the same file in the same commit: 'z9'. This can cause undesired inconsistencies in your repo.r   z_About to delete a folder containing files that have just been updated within the same commit: 'zLAbout to delete a file that have just been updated within the same commit: 'N)r   intr)   r/   rA   warningswarnr   parentsr;   r(   r+   )rl   Znb_additions_per_path	operationr)   parentr4   r4   r5   warn_on_overwriting_operations   s(    




rs      )endpointnum_threads)	additions	repo_typerepo_idtokenru   rv   c                    s~  g }t | ddD ]R}tdd |D |||d\}}	|	rZddd |	D }
td|
 ||7 }qd	d
 | D  g }|D ]:}|ddkrtd |d  j d qz|| qzt	|dkrtd dS dd fdd}t
rtdt	| d t|D ]}|| qn`t	|dkr@td ||d  n:tdt	| d| d t||dt	| d|td dS )a  
    Uploads the content of `additions` to the Hub using the large file storage protocol.

    Relevant external documentation:
        - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md

    Args:
        additions (`List` of `CommitOperationAdd`):
            The files to be uploaded
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        num_threads (`int`, *optional*):
            The number of concurrent threads to use when uploading. Defaults to 5.


    Raises: `RuntimeError` if an upload failed for any reason

    Raises: `ValueError` if the server returns malformed responses

    Raises: `requests.HTTPError` if the LFS batch endpoint returned an HTTP
        error

       )
chunk_sizec                 S   s   g | ]
}|j qS r4   )rE   rf   opr4   r4   r5   
<listcomp>W  s     z$upload_lfs_files.<locals>.<listcomp>)Zupload_infosrz   ry   rx   ru   
c              	   S   s0   g | ](}d | d d| di  d qS )z$Encountered error for file with OID oidz: `errormessage)get)rf   errr4   r4   r5   r   a  s   z$LFS batch endpoint returned errors:
c                 S   s   i | ]}|j j |qS r4   )rE   sha256hex)rf   Zadd_opr4   r4   r5   
<dictcomp>i  s     
 z$upload_lfs_files.<locals>.<dictcomp>actionsNzContent of file r   z/ is already present upstream - skipping upload.r   zNo LFS files to upload.rF   c              
      sZ   z | d  }t || d W n6 tk
rT } ztd|j d|W 5 d }~X Y nX d S )Nr   )rq   Zlfs_batch_actionrz   zError while uploading 'z' to the Hub.)r   	ExceptionRuntimeErrorr)   )Zbatch_actionrq   rW   Z	oid2addoprz   r4   r5   _wrapped_lfs_upload{  s
    z-upload_lfs_files.<locals>._wrapped_lfs_uploadz
Uploading z* LFS files to the Hub using `hf_transfer`.r   zUploading 1 LFS file to the Hubz" LFS files to the Hub using up to z threads concurrentlyzUpload z
 LFS files)descmax_workersZ
tqdm_class)r   r   joinr1   r   loggerdebugr)   appendlenr   hf_tqdmr   )rw   rx   ry   rz   ru   rv   Zbatch_actionschunkZbatch_actions_chunkZbatch_errors_chunkr   Zfiltered_actionsactionr   r4   r   r5   upload_lfs_files+  s\    )
	


r   )preupload_infoc                 C   sh   |  d}t|tstd|D ]B}t|trZt| dtrZt| dtrZ|d dks tdq | S )Nfilesz&preupload_info is improperly formattedrI   
uploadModer%   z'preupload_info is improperly formatted:)r   r/   listr1   dictr;   )r   r   	file_infor4   r4   r5   _validate_preupload_info  s    



r   F)rw   rx   ry   rz   revisionru   	create_prrG   c              	   C   s   |dk	r|nt }t|d}i }t| dD ]z}	ddd |	D i}
t j| d| d| d	| |
||rld
dindd}t| t| }|jf dd |d D  q(| D ]}|j	j
dkr|j}d||< q|S )av  
    Requests the Hub "preupload" endpoint to determine whether each input file
    should be uploaded as a regular git blob or as git LFS blob.

    Args:
        additions (`Iterable` of :class:`CommitOperationAdd`):
            Iterable of :class:`CommitOperationAdd` describing the files to
            upload to the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.

    Returns: `Dict[str, UploadMode]`
        Key is the file path, value is the upload mode ("regular" or "lfs").

    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    N)rz   r{   r   c                 S   s8   g | ]0}|j t|jjd |jj|jj dqS )ascii)rI   samplesizesha)	r)   r_   r`   rE   r   decoder   r   r   r}   r4   r4   r5   r     s   
z&fetch_upload_modes.<locals>.<listcomp>z/api/zs/z/preupload/r   1)jsonheadersparamsc                 S   s   i | ]}|d  |d qS )rI   r   r4   )rf   r]   r4   r4   r5   r     s      z&fetch_upload_modes.<locals>.<dictcomp>r   r'   )r   r   r   r   postr   r   r   updaterE   r   r)   )rw   rx   ry   rz   r   ru   r   r   upload_modesr   payloadrespr   ZadditionrI   r4   r4   r5   fetch_upload_modes  s.    %
 
r   r$   )copiesrx   ry   rz   r   ru   rG   c                 C   s   ddl m} |||d}i }t| dd dD ]\}	}
t|
}
dd |
D }td	t|tD ]L}|j||||t  |	px||d
}|D ] }|jst	d|||j
|	f< qqZ|
D ].}|j|	f|krtd|j d|	p| dqq,|S )a`  
    Requests the Hub files information of the LFS files to be copied, including their sha256.

    Args:
        copies (`Iterable` of :class:`CommitOperationCopy`):
            Iterable of :class:`CommitOperationCopy` describing the files to
            copy on the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.

    Returns: `Dict[Tuple[str, Optional[str]], RepoFile]]`
        Key is the file path and revision of the file to copy, value is the repo file.

    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    r   )HfApi)ru   rz   c                 S   s   | j S r@   )r?   )r~   r4   r4   r5   <lambda>      z)fetch_lfs_files_to_copy.<locals>.<lambda>)keyc                 S   s   g | ]
}|j qS r4   )r>   r}   r4   r4   r5   r     s     z+fetch_lfs_files_to_copy.<locals>.<listcomp>r   )ry   pathsr   rx   )Copying a non-LFS file is not implementedzCannot copy z at revision z: file is missing on repo.)hf_apir   r   r   ranger   FETCH_LFS_BATCH_SIZEZlist_files_infor&   NotImplementedErrorZ	rfilenamer>   r   )r   rx   ry   rz   r   ru   r   r   files_to_copyr?   rl   r   offsetZsrc_repo_filesZsrc_repo_filerq   r4   r4   r5   fetch_lfs_files_to_copy  s.    #r   )rl   r   r   commit_messagecommit_descriptionparent_commitrG   c           	      c   sN  |dk	r|nd}||d}|dk	r*||d< d|dV  | D ]}t |trz||jdkrzd|  |jd	d
dV  q:t |tr||jdkrd|jd|jj |jj	ddV  q:t |t
r|jrdndd|jidV  q:t |tr,||j|jf }|jstdd|jd|jd ddV  q:td| d||j q:dS )aG  
    Builds the payload to POST to the `/commit` API of the Hub.

    Payload is returned as an iterator so that it can be streamed as a ndjson in the
    POST request.

    For more information, see:
        - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
        - http://ndjson.org/
    N )summarydescriptionZparentCommitheader)r   valuer'   r]   r_   )contentrI   encodingr&   ZlfsFiler   )rI   algor   r   ZdeletedFolderZdeletedFilerI   r   )rI   r   r   z(Unknown operation to commit. Operation: z. Upload mode: )r/   rA   r   r)   rb   r   rE   r   r   r   r(   r+   r=   r>   r?   r&   r   r1   )	rl   r   r   r   r   r   header_valuerq   Zfile_to_copyr4   r4   r5   prepare_commit_payload.  sL    



	






r   )NF)N)NN)Hr:   r_   rM   rH   rn   collectionsr   
contextlibr   dataclassesr   r   	itertoolsr   pathlibr   r   typingr	   r
   r   r   r   r   r   r   r   r   r   Ztqdm.contrib.concurrentr   Zhuggingface_hubr   	constantsr   r   r&   r   r   r   utilsr   r   r   r   r    r!   r"   r#   r   r   r$   Z
get_loggerr7   r   Z
UploadModer   r(   r=   rA   r;   r-   ZCommitOperationrs   rm   r   r   r   r0   r   r   r   r4   r4   r4   r5   <module>   s   4$	
{.j  
J B  
