o
    ed                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZ dd	lmZ dd
lm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl(m0Z1 erddl2m3Z3 e-4e5Z6ed Z7dZ8e
G dd dZ9e
G dd dZ:e
G dd dZ;de<de<fddZ=ee;e:e9f Z>dee> ddfddZ?e/dd d!d"ee; d#e<d$e<d%ee< d&ee< d'e@fd(d)ZAd*eBfd+d,ZCe/		-d=d"ee; d#e<d$e<d%ee< d.e<d&ee< d/eDdee<e7f fd0d1ZEe/	d>d2ee: d#e<d$e<d%ee< d.e<d&ee< deee<ee< f d3f fd4d5ZF		d?dee> d6ee<e7f d7eee<ee< f d3f d8e<d9ee< d:ee< deee<ef  fd;d<ZGdS )@z<
Type definitions and utilities for the `create_commit` API
    N)defaultdict)contextmanager)	dataclassfield)groupby)PathPurePosixPath)TYPE_CHECKINGAnyBinaryIODictIterableIteratorListLiteralOptionalTupleUnion)
thread_map)get_session   )ENDPOINTHF_HUB_ENABLE_HF_TRANSFER)
UploadInfo
lfs_uploadpost_lfs_batch_info)EntryNotFoundErrorbuild_hf_headerschunk_iterablehf_raise_for_statusloggingtqdm_stream_filevalidate_hf_hub_args)tqdm)RepoFilelfsregulari  c                   @   s:   e Zd ZU dZeed< dZeee	d f ed< dd Z
dS )CommitOperationDeletea  
    Data structure holding necessary info to delete a file or a folder from a repository
    on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
            for a file or `"checkpoints/1fec34a/"` for a folder.
        is_folder (`bool` or `Literal["auto"]`, *optional*)
            Whether the Delete Operation applies to a folder or not. If "auto", the path
            type (file or folder) is guessed automatically by looking if path ends with
            a "/" (folder) or not (file). To explicitly set the path type, you can set
            `is_folder=True` or `is_folder=False`.
    path_in_repoauto	is_folderc                 C   sF   t | j| _| jdkr| jd| _t| jts!td| j dd S )Nr*   /zNWrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got 'z'.)_validate_path_in_repor)   r+   endswith
isinstancebool
ValueErrorself r4   KD:\Projects\ConvertPro\env\Lib\site-packages\huggingface_hub/_commit_api.py__post_init__E   s   
z#CommitOperationDelete.__post_init__N)__name__
__module____qualname____doc__str__annotations__r+   r   r0   r   r6   r4   r4   r4   r5   r(   1   s
   
 r(   c                   @   s:   e Zd ZU dZeed< eed< dZee ed< dd ZdS )CommitOperationCopyab  
    Data structure holding necessary info to copy a file in a repository on the Hub.

    Limitations:
      - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
      - Cross-repository copies are not supported.

    Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.

    Args:
        src_path_in_repo (`str`):
            Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
        path_in_repo (`str`):
            Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
        src_revision (`str`, *optional*):
            The git revision of the file to be copied. Can be any valid git revision.
            Default to the target commit revision.
    src_path_in_repor)   Nsrc_revisionc                 C   s   t | j| _t | j| _d S N)r-   r>   r)   r2   r4   r4   r5   r6   i   s   z!CommitOperationCopy.__post_init__)	r7   r8   r9   r:   r;   r<   r?   r   r6   r4   r4   r4   r5   r=   P   s   
 r=   c                   @   sv   e Zd ZU dZeed< eeeee	f ed< e
dddZeed< dd	d
Zeddedee	 fddZdefddZdS )CommitOperationAdda  
    Data structure holding necessary info to upload a file to a repository on the Hub.

    Args:
        path_in_repo (`str`):
            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
        path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
            Either:
            - a path to a local file (as `str` or `pathlib.Path`) to upload
            - a buffer of bytes (`bytes`) holding the content of the file to upload
            - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
                with `open(path, "rb")`. It must support `seek()` and `tell()` methods.

    Raises:
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
            `seek()` and `tell()`.
    r)   path_or_fileobjF)initreprupload_inforeturnNc              
   C   s   t | j| _t| jtrt| j| _t| jtr2tjtj	| j}tj
|s1td| dnt| jtjtfs?tdt| jtjriz| j  | jdtj W n ttfyh } ztd|d}~ww t| jtrxt| j| _dS t| jtrt| j| _dS t| j| _dS )z6Validates `path_or_fileobj` and compute `upload_info`.zProvided path: 'z(' is not a file on the local file systemzpath_or_fileobj must be either an instance of str, bytes or io.BufferedIOBase. If you passed a file-like object, make sure it is in binary mode.r   zNpath_or_fileobj is a file-like object but does not implement seek() and tell()N)r-   r)   r/   rB   r   r;   ospathnormpath
expanduserisfiler1   ioBufferedIOBasebytestellseekSEEK_CUROSErrorAttributeErrorr   	from_pathrE   
from_bytesZfrom_fileobj)r3   rB   excr4   r4   r5   r6      s:   
z CommitOperationAdd.__post_init__	with_tqdmc                 c   s    t | jtst | jtrF|r*t| j}|V  W d   dS 1 s#w   Y  dS t| jd}|V  W d   dS 1 s?w   Y  dS t | jtrUt| jV  dS t | jtj	ro| j
 }| jV  | j|tj dS dS )u  
        A context manager that yields a file-like object allowing to read the underlying
        data behind `path_or_fileobj`.

        Args:
            with_tqdm (`bool`, *optional*, defaults to `False`):
                If True, iterating over the file object will display a progress bar. Only
                works if the file-like object is a path to a file. Pure bytes and buffers
                are not supported.

        Example:

        ```python
        >>> operation = CommitOperationAdd(
        ...        path_in_repo="remote/dir/weights.h5",
        ...        path_or_fileobj="./local/weights.h5",
        ... )
        CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')

        >>> with operation.as_file() as file:
        ...     content = file.read()

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     while True:
        ...         data = file.read(1024)
        ...         if not data:
        ...              break
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]

        >>> with operation.as_file(with_tqdm=True) as file:
        ...     requests.put(..., data=file)
        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
        ```
        Nrb)r/   rB   r;   r   r!   openrN   rL   BytesIOrM   rO   rP   SEEK_SET)r3   rW   fileZprev_posr4   r4   r5   as_file   s    $""
zCommitOperationAdd.as_filec                 C   s:   |   }t| W  d   S 1 sw   Y  dS )z[
        The base64-encoded content of `path_or_fileobj`

        Returns: `bytes`
        N)r]   base64	b64encoderead)r3   r\   r4   r4   r5   
b64content   s   
$zCommitOperationAdd.b64contentrF   N)F)r7   r8   r9   r:   r;   r<   r   r   rN   r   r   rE   r   r6   r   r0   r   r]   ra   r4   r4   r4   r5   rA   n   s   
 
#1rA   r)   rF   c                 C   s   |  dr| dd  } | dks| dks|  dr td|  d|  dr+| d	d  } td
d | dD r?td|  d| S )Nr,   r   .z..z../z,Invalid `path_in_repo` in CommitOperation: ''z./   c                 s   s    | ]}|d kV  qdS )z.gitNr4   ).0partr4   r4   r5   	<genexpr>   s    z)_validate_path_in_repo.<locals>.<genexpr>z^Invalid `path_in_repo` in CommitOperation: cannot update files under a '.git/' folder (path: 'z').)
startswithr1   anysplit)r)   r4   r4   r5   r-      s   

r-   
operationsc                 C   s   t t}| D ]X}|j}t|tr9|| dkrtd| d ||  d7  < t|jD ]}|t	|  d7  < q,t|t
r^|t	t| dkr^|jrUtd| d qtd| d qdS )a  
    Warn user when a list of operations is expected to overwrite itself in a single
    commit.

    Rules:
    - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
      message is triggered.
    - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
      by a `CommitOperationDelete`, a warning is triggered.
    - If a `CommitOperationDelete` deletes a filepath that is then updated by a
      `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
      delete before upload) but can happen if a user deletes an entire folder and then
      add new files to it.
    r   zBAbout to update multiple times the same file in the same commit: 'z9'. This can cause undesired inconsistencies in your repo.r   z_About to delete a folder containing files that have just been updated within the same commit: 'zLAbout to delete a file that have just been updated within the same commit: 'N)r   intr)   r/   rA   warningswarnr   parentsr;   r(   r+   )rl   Znb_additions_per_path	operationr)   parentr4   r4   r5   warn_on_overwriting_operations   s8   

rs      )endpointnum_threads	additions	repo_typerepo_idtokenru   rv   c                    sx  g }t | ddD ])}tdd |D |||d\}}	|	r-ddd |	D }
td|
 ||7 }qd	d
 | D  g }|D ]}|ddu rUtd |d  j d q=|| q=t	|dkrhtd dS d fdd}t
rtdt	| d t|D ]}|| qdS t	|dkrtd ||d  dS tdt	| d| d t||dt	| d|td dS )a  
    Uploads the content of `additions` to the Hub using the large file storage protocol.

    Relevant external documentation:
        - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md

    Args:
        additions (`List` of `CommitOperationAdd`):
            The files to be uploaded
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        num_threads (`int`, *optional*):
            The number of concurrent threads to use when uploading. Defaults to 5.


    Raises: `RuntimeError` if an upload failed for any reason

    Raises: `ValueError` if the server returns malformed responses

    Raises: `requests.HTTPError` if the LFS batch endpoint returned an HTTP
        error

       )
chunk_sizec                 S      g | ]}|j qS r4   )rE   rf   opr4   r4   r5   
<listcomp>W      z$upload_lfs_files.<locals>.<listcomp>)Zupload_infosrz   ry   rx   ru   
c              	   S   s0   g | ]}d | d d| di  d qS )z$Encountered error for file with OID oidz: `errormessage)get)rf   errr4   r4   r5   r   a  s    "z$LFS batch endpoint returned errors:
c                 S   s   i | ]	}|j j |qS r4   )rE   sha256hex)rf   Zadd_opr4   r4   r5   
<dictcomp>i      z$upload_lfs_files.<locals>.<dictcomp>actionsNzContent of file r   z/ is already present upstream - skipping upload.r   zNo LFS files to upload.rF   c              
      sN   z | d  }t || d W d S  ty& } z
td|j d|d }~ww )Nr   )rq   Zlfs_batch_actionrz   zError while uploading 'z' to the Hub.)r   	ExceptionRuntimeErrorr)   )Zbatch_actionrq   rV   Z	oid2addoprz   r4   r5   _wrapped_lfs_upload{  s   z-upload_lfs_files.<locals>._wrapped_lfs_uploadz
Uploading z* LFS files to the Hub using `hf_transfer`.r   zUploading 1 LFS file to the Hubz" LFS files to the Hub using up to z threads concurrentlyzUpload z
 LFS files)descmax_workersZ
tqdm_classrb   )r   r   joinr1   r   loggerdebugr)   appendlenr   hf_tqdmr   )rw   rx   ry   rz   ru   rv   Zbatch_actionschunkZbatch_actions_chunkZbatch_errors_chunkr   Zfiltered_actionsactionr   r4   r   r5   upload_lfs_files+  s^   )
	




r   preupload_infoc                 C   sh   |  d}t|tstd|D ]!}t|tr-t| dtr-t| dtr-|d dv s1tdq| S )Nfilesz&preupload_info is improperly formattedrH   
uploadModer%   z'preupload_info is improperly formatted:)r   r/   listr1   dictr;   )r   r   	file_infor4   r4   r5   _validate_preupload_info  s   

r   Frevision	create_prc              	   C   s   |dur|nt }t|d}i }t| dD ]?}	ddd |	D i}
t j| d| d| d	| |
||r6d
dindd}t| t| }|jdi dd |d D  q| D ]}|j	j
dkre|j}d||< qV|S )av  
    Requests the Hub "preupload" endpoint to determine whether each input file
    should be uploaded as a regular git blob or as git LFS blob.

    Args:
        additions (`Iterable` of :class:`CommitOperationAdd`):
            Iterable of :class:`CommitOperationAdd` describing the files to
            upload to the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.

    Returns: `Dict[str, UploadMode]`
        Key is the file path, value is the upload mode ("regular" or "lfs").

    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    N)rz   r{   r   c                 S   s8   g | ]}|j t|jjd |jj|jj dqS )ascii)rH   samplesizesha)	r)   r^   r_   rE   r   decoder   r   r   r~   r4   r4   r5   r     s    
z&fetch_upload_modes.<locals>.<listcomp>z/api/zs/z/preupload/r   1)jsonheadersparamsc                 S   s   i | ]	}|d  |d qS )rH   r   r4   )rf   r\   r4   r4   r5   r     r   z&fetch_upload_modes.<locals>.<dictcomp>r   r'   r4   )r   r   r   r   postr   r   r   updaterE   r   r)   )rw   rx   ry   rz   r   ru   r   r   upload_modesr   payloadrespr   ZadditionrH   r4   r4   r5   fetch_upload_modes  s.   %
 r   copiesr$   c                 C   s   ddl m} |||d}i }t| dd dD ]X\}	}
t|
}
dd |
D }td	t|tD ]&}|j||||t  |	p<||d
}|D ]}|jsKt	d|||j
|	f< qBq-|
D ]}|j|	f|vrmtd|j d|	ph| dqVq|S )a`  
    Requests the Hub files information of the LFS files to be copied, including their sha256.

    Args:
        copies (`Iterable` of :class:`CommitOperationCopy`):
            Iterable of :class:`CommitOperationCopy` describing the files to
            copy on the Hub.
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        token (`str`, *optional*):
            An authentication token ( See https://huggingface.co/settings/tokens )
        revision (`str`):
            The git revision to upload the files to. Can be any valid git revision.

    Returns: `Dict[Tuple[str, Optional[str]], RepoFile]]`
        Key is the file path and revision of the file to copy, value is the repo file.

    Raises:
        [`~utils.HfHubHTTPError`]
            If the Hub API returned an error.
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If the Hub API response is improperly formatted.
    r   )HfApi)ru   rz   c                 S   s   | j S r@   )r?   )r   r4   r4   r5   <lambda>  s    z)fetch_lfs_files_to_copy.<locals>.<lambda>)keyc                 S   r}   r4   )r>   r~   r4   r4   r5   r     r   z+fetch_lfs_files_to_copy.<locals>.<listcomp>r   )ry   pathsr   rx   )Copying a non-LFS file is not implementedzCannot copy z at revision z: file is missing on repo.)hf_apir   r   r   ranger   FETCH_LFS_BATCH_SIZEZlist_files_infor&   NotImplementedErrorZ	rfilenamer>   r   )r   rx   ry   rz   r   ru   r   r   files_to_copyr?   rl   r   offsetZsrc_repo_filesZsrc_repo_filerq   r4   r4   r5   fetch_lfs_files_to_copy  s8   #
r   r   r   commit_messagecommit_descriptionparent_commitc           	      c   sH   |dur|nd}||d}|dur||d< d|dV  | D ]}t |tr=||jdkr=d|  |jd	d
dV  qt |tr]||jdkr]d|jd|jj |jj	ddV  qt |t
rq|jrgdndd|jidV  qt |tr||j|jf }|jstdd|jd|jd ddV  qtd| d||j dS )aG  
    Builds the payload to POST to the `/commit` API of the Hub.

    Payload is returned as an iterator so that it can be streamed as a ndjson in the
    POST request.

    For more information, see:
        - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
        - http://ndjson.org/
    N )summarydescriptionZparentCommitheader)r   valuer'   r\   r^   )contentrH   encodingr&   ZlfsFiler   )rH   algor   r   ZdeletedFolderZdeletedFilerH   r   )rH   r   r   z(Unknown operation to commit. Operation: z. Upload mode: )r/   rA   r   r)   ra   r   rE   r   r   r   r(   r+   r=   r>   r?   r&   r   r1   )	rl   r   r   r   r   r   header_valuerq   Zfile_to_copyr4   r4   r5   prepare_commit_payload.  sT   


	








r   )NFr@   )NN)Hr:   r^   rL   rG   rn   collectionsr   
contextlibr   dataclassesr   r   	itertoolsr   pathlibr   r   typingr	   r
   r   r   r   r   r   r   r   r   r   Ztqdm.contrib.concurrentr   Zhuggingface_hubr   	constantsr   r   r&   r   r   r   utilsr   r   r   r   r    r!   r"   r#   r   r   r$   Z
get_loggerr7   r   Z
UploadModer   r(   r=   rA   r;   r-   ZCommitOperationrs   rm   r   r   r   r0   r   r   r   r4   r4   r4   r5   <module>   s    4$	
{.j
JB
