U
    sVc                     @  s  d Z ddlmZ ddlmZmZ ddlZddlZddlZddl	Z	ddl
Z
ddlmZmZmZmZmZmZ ddlZddlZddlmZ ddlZddlZddlmZmZmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$m%Z&m'Z'm(Z(m)Z) ddl*Z*ddl+Z+dd	l,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 dd
l4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@ ddlAmBZB eCe)e' e( ZDeDEd eFdZGe de-dZHejIG dd dZJejIG dd dee ZKdddddZLe"ddddd ZMe"dddd!d ZMd"d"dd#d ZMdd$d%d&d'ZNe"d~d)ddd*d+d,ZOe"ddddd*d-d,ZOdd/dd"d*d0d,ZOd1d2 ZPd3ddd4d5ZQe9eBd6 eBd7 d8 d9dd3dd<dd=dd>d?d@ZRdddAdBdCZSdDdDdDdDdEdFdGdHdIdJ	ZTeCeTU ZVd<dKdLdMdNZWe9eBd7 d8 dOd3dPdPdQdRdSZXdTd$dAdUdVZYe"d(d(d(d(d(dWd3ddPd<ddXdPd=dYdZ	d[d\ZZe"d(d(d(d(d(d(d]d3ddPd<dd^dPd=d_dZ	d`d\ZZe"d(d(d(d(d(d(d]d3ddPd<dddPd=dadZ	dbd\ZZe9eBd7 dc dOddd.ddddd]d3ddPd<dddPd=dadZ	ded\ZZG dfdg dgeeZ[G dhdi die[Z\G djdk dke[Z]G dldm dmZ^G dndo doZ_dpddqdrdsdtZ`d3dddudvZad3dddwdxdyZbe	jcdzd{d|d}ZddS )zCommon IO api utilities    )annotations)ABCabstractmethodN)BufferedIOBaseBytesIO	RawIOBaseStringIO
TextIOBaseTextIOWrapper)Path)
IOAnyAnyStrGenericLiteralMappingSequenceTypeVarcastoverload)urljoinurlparseuses_netlocuses_paramsuses_relative)
BaseBufferCompressionDictCompressionOptionsFilePath
ReadBufferStorageOptionsWriteBuffer)get_lzma_file)import_optional_dependency)doc)find_stack_level)is_boolis_file_like
is_integeris_list_like)_shared_docs z^[A-Za-z][A-Za-z0-9+\-+.]*://BaseBufferT)boundc                   @  s>   e Zd ZU dZded< ded< ded< ded< d	Zd
ed< dS )IOArgsz?
    Return value of io/common.py:_get_filepath_or_buffer.
    str | BaseBufferfilepath_or_bufferstrencodingmoder   compressionFboolshould_closeN)__name__
__module____qualname____doc____annotations__r6    r<   r<   4/tmp/pip-unpacked-wheel-xj8nt62q/pandas/io/common.pyr.   N   s   
r.   c                   @  sn   e Zd ZU dZded< ded< ejedZded< d	Z	d
ed< ddddZ
ddddZdddddZdS )	IOHandlesau  
    Return value of io/common.py:get_handle

    Can be used as a context manager.

    This is used to easily close created buffers and to handle corner cases when
    TextIOWrapper is inserted.

    handle: The file handle to be used.
    created_handles: All file handles that are created by get_handle
    is_wrapped: Whether a TextIOWrapper needs to be detached.
    z
IO[AnyStr]handler   r4   )default_factoryzlist[IO[bytes] | IO[str]]created_handlesFr5   
is_wrappedNonereturnc                 C  s\   | j r8t| jtst| j  | j  | j| j | jD ]}|	  q>g | _d| _ dS )z
        Close all created buffers.

        Note: If a TextIOWrapper was inserted, it is flushed and detached to
        avoid closing the potentially user-created buffer.
        FN)
rB   
isinstancer?   r
   AssertionErrorflushdetachrA   removeclose)selfr?   r<   r<   r=   rK   p   s    



zIOHandles.closezIOHandles[AnyStr]c                 C  s   | S Nr<   rL   r<   r<   r=   	__enter__   s    zIOHandles.__enter__r   )argsrE   c                 G  s   |    d S rM   )rK   )rL   rP   r<   r<   r=   __exit__   s    zIOHandles.__exit__N)r7   r8   r9   r:   r;   dataclassesfieldlistrA   rB   rK   rO   rQ   r<   r<   r<   r=   r>   [   s   
r>   objectr5   )urlrE   c                 C  s   t | tsdS t| jtkS )z
    Check to see if a URL has a valid protocol.

    Parameters
    ----------
    url : str or unicode

    Returns
    -------
    isurl : bool
        If `url` has a valid protocol return True otherwise False.
    F)rF   r1   	parse_urlscheme_VALID_URLSrV   r<   r<   r=   is_url   s    
r[   r1   )r0   rE   c                 C  s   d S rM   r<   r0   r<   r<   r=   _expand_user   s    r]   c                 C  s   d S rM   r<   r\   r<   r<   r=   r]      s    zstr | BaseBufferTc                 C  s   t | trtj| S | S )a]  
    Return the argument with an initial component of ~ or ~user
    replaced by that user's home directory.

    Parameters
    ----------
    filepath_or_buffer : object to be converted if possible

    Returns
    -------
    expanded_filepath_or_buffer : an expanded filepath or the
                                  input if not expandable
    )rF   r1   ospath
expanduserr\   r<   r<   r=   r]      s    
rC   )headerrE   c                 C  s   | d krd S t | r2tt| } | dk r.tdd S t| ddr|tt| } ttt | s^tdtdd | D rxtdd S t	| rt
d	tdd S )
Nr   zUPassing negative integer to header is invalid. For no header, use header=None insteadF)Z
allow_setsz*header must be integer or list of integersc                 s  s   | ]}|d k V  qdS )r   Nr<   ).0ir<   r<   r=   	<genexpr>   s     z&validate_header_arg.<locals>.<genexpr>z8cannot specify multi-index header with negative integerszPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column names)r(   r   int
ValueErrorr)   r   allmapanyr&   	TypeError)ra   r<   r<   r=   validate_header_arg   s*    

rk   .r   )r0   convert_file_likerE   c                 C  s   d S rM   r<   r0   rl   r<   r<   r=   stringify_path   s    rn   c                 C  s   d S rM   r<   rm   r<   r<   r=   rn      s    FzFilePath | BaseBufferTc                 C  s2   |st | rtt| S t| tjr*|  } t| S )a  
    Attempt to convert a path-like object to a string.

    Parameters
    ----------
    filepath_or_buffer : object to be converted

    Returns
    -------
    str_filepath_or_buffer : maybe a string version of the object

    Notes
    -----
    Objects supporting the fspath protocol (python 3.6+) are coerced
    according to its __fspath__ method.

    Any other object is passed through unchanged, which includes bytes,
    strings, buffers, or anything else that's not even path-like.
    )r'   r   r,   rF   r^   PathLike
__fspath__r]   rm   r<   r<   r=   rn      s
    
c                  O  s   ddl }|jj| |S )z`
    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
    the stdlib.
    r   N)urllib.requestrequesturlopen)rP   kwargsurllibr<   r<   r=   rs     s    rs   zFilePath | BaseBufferc                 C  s$   t | to"tt| o"| d S )zR
    Returns true if the given URL looks like
    something fsspec can handle
    )zhttp://zhttps://)rF   r1   r5   _RFC_3986_PATTERNmatch
startswithrZ   r<   r<   r=   is_fsspec_url  s
    

ry   storage_optionscompression_optionsr0   )rz   r{   utf-8rr   r    )r0   r2   r4   r3   rz   rE   c              	   C  s  t | } t|\}}t| |}|rJt| drJd|krJtjdtt d d}t||d}d|kr|dkr|d	krtj| d
| t	t d |}d|krd|kr|d7 }t
| tr.t| r.|pi }ddl}|jj| |d}t|2}	|	jdd}
|
dkrddi}t|	 }W 5 Q R X t|||d|dS t| rXt
| tsHt| dr`| dd} | drx| dd} td}g }z&td ddlm}m} ||tg}W n tk
r   Y nX z$|j | fd|i|pi   }W n^ t!|k
rD   |dkrddi}nt|}d|d< |j | fd|i|p6i   }Y nX t|||d|dS |rft"dt
| tt#t$j$frtt%| ||d|dS t| dst| dsdt&|  }t"|t| ||d|dS )a%  
    If the filepath_or_buffer is a url, translate and return the buffer.
    Otherwise passthrough.

    Parameters
    ----------
    filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
                         or buffer
    {compression_options}

        .. versionchanged:: 1.4.0 Zstandard support.

    encoding : the encoding to use to decode bytes, default is 'utf-8'
    mode : str, optional

    {storage_options}

        .. versionadded:: 1.2.0

    ..versionchange:: 1.2.0

      Returns the dataclass IOArgs.
    writebzDcompression has no effect when passing a non-binary object as input.)
stacklevelN)methodw)bz2xz)zutf-16zutf-32z( will not write the byte order mark for tr   )headerszContent-Encodinggzipr   T)r0   r2   r4   r6   r3   zs3a://zs3://zs3n://fsspecZbotocore)ClientErrorNoCredentialsErrorr3   Zanonz?storage_options passed with file object or non-fsspec file pathFreadz)Invalid file path or buffer object type: )'rn   get_compression_methodinfer_compressionhasattrwarningswarnRuntimeWarningr%   dictUnicodeWarningrF   r1   r[   rq   rr   Requestrs   r   getr   r   r.   ry   rG   rx   replacer#   Zbotocore.exceptionsr   r   PermissionErrorImportErroropentuplerf   bytesmmapr]   type)r0   r2   r4   r3   rz   compression_methodZfsspec_moderu   Zreq_inforeqcontent_encodingreaderr   Zerr_types_to_retry_with_anonr   r   Zfile_objmsgr<   r<   r=   _get_filepath_or_buffer  s    "
	


 


r   )r_   rE   c                 C  s   ddl m} td|| S )z
    converts an absolute native path to a FILE URL.

    Parameters
    ----------
    path : a path in native format

    Returns
    -------
    a valid FILE URL
    r   )pathname2urlzfile:)rq   r   r   )r_   r   r<   r<   r=   file_path_to_url  s    r   tarr   r   zipr   zstd)	.tar.tar.gz.tar.bz2.tar.xz.gz.bz2.zip.xzz.zstz"tuple[str | None, CompressionDict])r4   rE   c              
   C  s`   t | trPt| }z|d}W qX tk
rL } ztd|W 5 d}~X Y qXX ni }| }||fS )a  
    Simplifies a compression argument to a compression method string and
    a mapping containing additional arguments.

    Parameters
    ----------
    compression : str or mapping
        If string, specifies the compression method. If mapping, value at key
        'method' specifies compression method.

    Returns
    -------
    tuple of ({compression method}, Optional[str]
              {compression arguments}, Dict[str, Any])

    Raises
    ------
    ValueError on mapping missing 'method' key
    r   z.If mapping, compression must have key 'method'N)rF   r   r   popKeyErrorrf   )r4   compression_argsr   errr<   r<   r=   r     s    
r   )r{   
str | None)r0   r4   rE   c                 C  s   |dkrdS |dkrZt | dd} t| ts.dS t D ]\}}|  |r6|  S q6dS |tkrf|S ddgtt }d| d| }t	|dS )a/  
    Get the compression method for filepath_or_buffer. If compression='infer',
    the inferred compression method is returned. Otherwise, the input
    compression method is returned unchanged, unless it's invalid, in which
    case an error is raised.

    Parameters
    ----------
    filepath_or_buffer : str or file handle
        File path or object.
    {compression_options}

        .. versionchanged:: 1.4.0 Zstandard support.

    Returns
    -------
    string or None

    Raises
    ------
    ValueError on invalid compression specified.
    NZinferT)rl   Unrecognized compression type: z
Valid compression types are )
rn   rF   r1   _extension_to_compressionitemslowerendswith_supported_compressionssortedrf   )r0   r4   	extensionZvalidr   r<   r<   r=   r     s     

r   z
Path | strc                 C  s&   t | j}| s"td| ddS )z
    Check if parent directory of a file exists, raise OSError if it does not

    Parameters
    ----------
    path: Path or str
        Path to check parent directory of
    z1Cannot save file into a non-existent directory: ''N)r   parentis_dirOSError)r_   r   r<   r<   r=   check_parent_directoryJ  s    	
r   )r2   r4   
memory_maperrorsrz   zLiteral[False]zIOHandles[bytes])	path_or_bufr3   r2   r4   r   is_textr   rz   rE   c                C  s   d S rM   r<   r   r3   r2   r4   r   r   r   rz   r<   r<   r=   
get_handleX  s    r   )r2   r4   r   r   r   rz   zLiteral[True]zIOHandles[str]c                C  s   d S rM   r<   r   r<   r<   r=   r   g  s    z!IOHandles[str] | IOHandles[bytes]c                C  s   d S rM   r<   r   r<   r<   r=   r   v  s    r   Tc                C  s  |pd}|pd}t | |r*d|kr*|d7 }t| t|trHt| t| ||||d}|j}	t|	|\}	}}
t|	t}t	|j
}|d}d|kr|rtt|	 |rR|dkr|jdd	|_n |dkrd|jkr| jd7  _|d
kr2t|	trtjf |	|jd|}	ntjf |	|jd|}	n|dkrXtj|	fd|ji|}	n|dkrt|	|jf|}	|	jjdkr6|
|	 |	j }t|dkr|	j| }	n$|std|  ntd| nZ|dkr|d|j t|	trtf d|	i|}	ntf d|	i|}	t|	ts6td|	jjkr6|
|	 |	j }t|dkr|	j|d }|dk	st|}	n$|std|  ntd| n|dkrt |	|j}	nl|dkr$t d}d|jkrd|j!f |i}nd|j"f |i}|j|	fd|ji|}	nd| }t|t|	trFt|
|	 nNt|	tr|j#rd|jkrt|	|j|j#|d	d}	nt|	|j}	|
|	 d }|s|jd!krt|	t$rt%|	|j#d"}	n~|rP|s|st |	|jrPt&|	d#rt&|	d$rt&|	d%st'|	}	t(|	|j#|d	d}	|
|	 t|jtpL|j) }d|jkr~t&|	d&s~t*d't+|j d(|
,  |j)rt|jtrt|
|j t-|	|
||j
d)S )*a  
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf : str or file handle
        File path or object.
    mode : str
        Mode to open path_or_buf with.
    encoding : str or None
        Encoding to use.
    {compression_options}

        .. versionchanged:: 1.0.0
           May now be a dict with key 'method' as compression mode
           and other keys as compression options if compression
           mode is 'zip'.

        .. versionchanged:: 1.1.0
           Passing compression options as keys in dict is now
           supported for compression modes 'gzip', 'bz2', 'zstd' and 'zip'.

        .. versionchanged:: 1.4.0 Zstandard support.

    memory_map : bool, default False
        See parsers._parser_params for more information. Only used by read_csv.
    is_text : bool, default True
        Whether the type of the content passed to the file/buffer is string or
        bytes. This is not the same as `"b" not in mode`. If a string content is
        passed to a binary file/buffer, a wrapper is inserted.
    errors : str, default 'strict'
        Specifies how encoding and decoding errors are to be handled.
        See the errors argument for :func:`open` for a full list
        of options.
    storage_options: StorageOptions = None
        Passed to _get_filepath_or_buffer

    .. versionchanged:: 1.2.0

    Returns the dataclass IOHandles
    r|   strictr   )r2   r4   r3   rz   r   r}   r   r   r+   r   )filenamer3   )fileobjr3   r   r3   r      zZero files found in ZIP file z9Multiple files found in ZIP file. Only one file per ZIP: r   namer   r   Nz Zero files found in TAR archive zDMultiple files found in TAR archive. Only one file per TAR archive: r   	zstandardZdctxZcctxr   )r2   r   newlineFrb)r2   readablewritableseekabler   z1Expected file path name or file-like object, got z type)r?   rA   rB   r4   )._is_binary_modecodecslookuprF   r1   lookup_errorr   r0   _maybe_memory_mapr   r4   r   r   r3   r   r   GzipFiler   BZ2File_BytesZipFilebufferappendnamelistlenr   rf   
setdefault_BytesTarFilerG   getnamesextractfiler"   r#   ZstdDecompressorZZstdCompressorr2   r	   _BytesIOWrapperr   
_IOWrapperr
   r6   rj   r   reverser>   )r   r3   r2   r4   r   r   r   rz   Zioargsr?   handlesZis_pathr   Z	zip_namesfilesfiler   Z	open_argsr   rB   r<   r<   r=   r     s>   6







	


 







	


c                      s8   e Zd ZdZeddddZdd fddZ  ZS )_BufferedWriterz
    Some objects do not support multiple .write() calls (TarFile and ZipFile).
    This wrapper writes to the underlying buffer on close.
    rC   rD   c                 C  s   d S rM   r<   rN   r<   r<   r=   write_to_buffer  s    z_BufferedWriter.write_to_bufferc              	     sP   | j r
d S |  r8| d | j |   W 5 Q R X n
| j  t   d S )Nr   )closedgetvalueseekr   r   rK   superrN   	__class__r<   r=   rK     s    

z_BufferedWriter.close)r7   r8   r9   r:   r   r   rK   __classcell__r<   r<   r   r=   r     s   r   c                      sX   e Zd Zddddddd fdd	Zd
d
dddZddddZddddZ  ZS )r   Nr}   r   zLiteral[('r', 'a', 'w', 'x')]z-ReadBuffer[bytes] | WriteBuffer[bytes] | NonerC   )r   r3   r   archive_namerE   c                   s<   t    || _|| _tjjf || ||d|| _d S )N)r   r3   r   )	r   __init__r   r   tarfileTarFiler   extend_moder   )rL   r   r3   r   r   rt   r   r<   r=   r     s    
z_BytesTarFile.__init__r1   )r3   rE   c                 C  sP   | dd}|dkr|S | jd k	rLt| jj}|dkrL| d|dd   }|S )Nr   r+   r   )r   r   r   :r   )r   r   r   suffix)rL   r3   r   r<   r<   r=   r     s    
z_BytesTarFile.extend_moderD   c                 C  sP   | j dkrdS t| j }|jdkr.|dj S |jdkrJ|ddj S |j S )z
        If an explicit archive_name is not given, we still want the file inside the zip
        file not to be named something.tar, because that causes confusion (GH39465).
        Nr   r+   )r   r   r   )r   r   r   with_suffixrL   r   r<   r<   r=   infer_filename  s    



z_BytesTarFile.infer_filenamec                 C  s>   | j p|  pd}tj|d}t|  |_| j||  d S )Nr   r   )	r   r   r   TarInfor   r   sizer   addfile)rL   r   tarinfor<   r<   r=   r     s    z_BytesTarFile.write_to_buffer)Nr}   NN)r7   r8   r9   r   r   r   r   r   r<   r<   r   r=   r     s       
r   c                      sF   e Zd Zdddddd fddZdd	d
dZdd	ddZ  ZS )r   Nz1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]r1   r   rC   )r   r3   r   rE   c                   s@   t    |dd}|| _|dtj tj||f|| _d S )Nr   r+   r4   )	r   r   r   r   r   zipfileZIP_DEFLATEDZipFiler   )rL   r   r3   r   rt   r   r<   r=   r     s
    
z_BytesZipFile.__init__rD   c                 C  s@   t | jjtjtfr<t| jj}|jdkr6|dj	S |j	S dS )z
        If an explicit archive_name is not given, we still want the file inside the zip
        file not to be named something.zip, because that causes confusion (GH39465).
        r   r+   N)
rF   r   r   r^   ro   r1   r   r   r   r   r   r<   r<   r=   r      s    
z_BytesZipFile.infer_filenamec                 C  s(   | j p|  pd}| j||   d S )Nr   )r   r   r   writestrr   )rL   r   r<   r<   r=   r     s    z_BytesZipFile.write_to_buffer)N)r7   r8   r9   r   r   r   r   r<   r<   r   r=   r     s    r   c                   @  sT   e Zd ZdddddZdddd	Zd
dddZd
dddZd
dddZdS )r   r   rC   )r   rE   c                 C  s
   || _ d S rM   )r   )rL   r   r<   r<   r=   r     s    z_IOWrapper.__init__r1   r   c                 C  s   t | j|S rM   getattrr   )rL   r   r<   r<   r=   __getattr__  s    z_IOWrapper.__getattr__r5   rD   c                 C  s   t | jdr| j S dS )Nr   T)r   r   r   rN   r<   r<   r=   r     s    
z_IOWrapper.readablec                 C  s   t | jdr| j S dS )Nr   T)r   r   r   rN   r<   r<   r=   r   %  s    
z_IOWrapper.seekablec                 C  s   t | jdr| j S dS )Nr   T)r   r   r   rN   r<   r<   r=   r   *  s    
z_IOWrapper.writableN)r7   r8   r9   r   r	  r   r   r   r<   r<   r<   r=   r     s
   r   c                   @  s@   e Zd ZdddddddZddd	d
ZddddddZdS )r   r|   zStringIO | TextIOBaser1   rC   )r   r2   rE   c                 C  s   || _ || _d| _d S )N    )r   r2   overflow)rL   r   r2   r<   r<   r=   r   4  s    z_BytesIOWrapper.__init__)attrc                 C  s   t | j|S rM   r  )rL   r  r<   r<   r=   r	  =  s    z_BytesIOWrapper.__getattr__z
int | Noner   )nrE   c                 C  st   | j d k	st| j || j}| j| }|d ksH|dk sH|t|krRd| _|S |d | }||d  | _|S d S )Nr   r
  )r   rG   r   encoder2   r  r   )rL   r  Z
bytestringZcombined_bytestringZ	to_returnr<   r<   r=   r   @  s    
z_BytesIOWrapper.readN)r|   )r  )r7   r8   r9   r   r	  r   r<   r<   r<   r=   r   1  s   	r   r/   z/tuple[str | BaseBuffer, bool, list[BaseBuffer]])r?   r   rE   c              	   C  s   g }|t | dpt| tM }|s*| ||fS t| trHt| d} ||  zttj| 	 dtj
d}W 5 t|D ]} |   qpX |||gfS )zTry to memory map file/buffer.filenor   r   )access)r   rF   r1   r   r   reversedrK   r   r   r  ZACCESS_READ)r?   r   r   wrappedr<   r<   r=   r   N  s$    



  r   c              	   C  sH   d}t | } t| ts|S ztj| }W n ttfk
rB   Y nX |S )zTest whether file exists.F)rn   rF   r1   r^   r_   existsrj   rf   )r0   r  r<   r<   r=   file_existsm  s    
r  )r?   r3   rE   c                 C  sV   d|ksd|krd|kS t jt jt jf}tt| |r:dS t| t pTdt| d|kS )z+Whether the handle is opened in binary moder   r   Fr3   )	r   StreamWriterStreamReaderStreamReaderWriter
issubclassr   rF   _get_binary_io_classesr  )r?   r3   Ztext_classesr<   r<   r=   r   {  s      r   ztuple[type, ...]rD   c               	   C  sH   t tf} tddd}|dk	rD| d}| t|f7 } W 5 Q R X | S )z!IO classes that that expect bytesr   ignore)r   Nr
  )r   r   r#   r   Zstream_readerr   )Zbinary_classesr   r   r<   r<   r=   r    s    r  ).).)F)r|   Nr}   N)er:   
__future__r   abcr   r   r   r   rR   	functoolsr   ior   r   r   r   r	   r
   r   r^   pathlibr   rer   typingr   r   r   r   r   r   r   r   r   r   urllib.parser   r   rW   r   r   r   r   r  Zpandas._typingr   r   r   r   r   r    r!   Zpandas.compatr"   Zpandas.compat._optionalr#   Zpandas.util._decoratorsr$   Zpandas.util._exceptionsr%   Zpandas.core.dtypes.commonr&   r'   r(   r)   Zpandas.core.shared_docsr*   setrY   discardcompilerv   r,   Z	dataclassr.   r>   r[   r]   rk   rn   rs   ry   r   r   r   valuesr   r   r   r   r   r   r   r   r   r   r   r  r   	lru_cacher  r<   r<   r<   r=   <module>   s    0$	

,  "

     3#8$$$$  7$