U
    襡c
'                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ d dlmZ dd ZG d	d
 d
Zdd ZdddhZdddZdddZdddZdS )    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherError)Versionc                   C   s   t jtdk rtdd S )Nz0.17.0z!feather requires pandas >= 0.17.0)r   Zloose_versionr	   ImportError r   r   3/tmp/pip-unpacked-wheel-qoi2rb4q/pyarrow/feather.py_check_pandas_version   s    r   c                   @   s6   e Zd ZdZdddZdddZdd	 Zdd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 C   s   || _ || _d S N)pathsvalidate_schema)selfZpath_or_pathsr   r   r   r   __init__/   s    zFeatherDataset.__init__Nc                 C   sj   t | jd |d}|g| _|j| _| jdd D ].}t ||d}| jrR| || | j| q0t| jS )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   Z_tablesr   r   validate_schemasappendr   )r   r   Z_filpathtabler   r   r   r   3   s    zFeatherDataset.read_tablec                 C   s(   | j |j s$td|| j |j d S )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   Zpiecer   r   r   r   r   L   s     zFeatherDataset.validate_schemasc                 C   s   t   | j|dj|dS )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   use_threadsr   r   Z	to_pandas)r   r   r    r   r   r   read_pandasS   s    zFeatherDataset.read_pandas)T)N)NT)__name__
__module____qualname____doc__r   r   r   r"   r   r   r   r   r   #   s
   

r   c                 C   sN   |j dkrd S |jt t fkr4td| ntd| t|jd S )Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)Z
num_chunkstypeextbinarystringr   r   str)namecolr   r   r   check_chunked_overflowh   s    
 r.   lz4ZzstdZuncompressed   c                 C   s  t jr(t  t jr(t| t jjr(|  } t | r|dkr@d}n|dkrNd}nt	dt
j| |d}|dkrt|jjD ]\}}	|| }
t|	|
 qxn| }|dkrt|jtt|jkrt	d|dk	rt	d|dk	rt	d	n<|dkrtd
rd}n$|dk	r |tkr t	d|tztj||||||d W nL tk
r   t|trzt| W n tjk
r~   Y nX  Y nX dS )a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr0   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize optionZ	lz4_framer/   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   Zhave_pandasr   Z
has_sparse
isinstancepdZSparseDataFrameZto_denseZis_data_framer   r   Zfrom_pandas	enumerater   namesr.   lenZcolumn_namessetr   Zis_available_FEATHER_SUPPORTED_CODECSr   r   write_feather	Exceptionr+   osremoveerror)Zdfdestr2   r3   r4   r5   r1   r   ir,   r-   r   r   r   r=   z   s^    



 
r=   TFc                 C   s   t   t| |||dj|dS )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.

    Returns
    -------
    df : pandas.DataFrame
    )r   
memory_mapr    r   r!   )sourcer   r    rD   r   r   r   read_feather   s      rF   c                 C   s   t j| ||d}|dkr | S dd |D }ttdd |rL||}n<ttdd |rj||}ndd |D }td	|||j	d
k r|S t
t||kr|S ||S dS )aW  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
    )Zuse_memory_mapr    Nc                 S   s   g | ]}t |qS r   )r'   ).0columnr   r   r   
<listcomp>  s     zread_table.<locals>.<listcomp>c                 S   s   | t kS r   )inttr   r   r   <lambda>      zread_table.<locals>.<lambda>c                 S   s   | t kS r   )r+   rK   r   r   r   rM   	  rN   c                 S   s   g | ]
}|j qS r   )r#   )rG   rL   r   r   r   rI     s     z<Columns must be indices or names. Got columns {} of types {}   )r   ZFeatherReaderreadallmapZread_indicesZ
read_names	TypeErrorr   r5   sortedr;   select)rE   r   rD   r    readerZcolumn_typesr   Zcolumn_type_namesr   r   r   r      s,       
r   )NNNr0   )NTF)NFT)r?   Zpyarrow.pandas_compatr   Zpyarrow.libr   r   r   r   libr(   Zpyarrowr   Zpyarrow._featherr   Zpyarrow.vendored.versionr	   r   r   r.   r<   r=   rF   r   r   r   r   r   <module>   s    E
    
U
