U
    -e&                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlmZ	 d dl
mZ d dlmZ G dd dZdd	 Zd
ddhZdddZdddZdddZdS )    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   @   s6   e Zd ZdZdddZdddZdd	 Zdd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 C   s   || _ || _d S N)pathsvalidate_schema)selfZpath_or_pathsr    r   P/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/feather.py__init__)   s    zFeatherDataset.__init__Nc                 C   sj   t | jd |d}|g| _|j| _| jdd D ].}t ||d}| jrR| || | j| q0t| jS )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   Z_tablesr   r   validate_schemasappendr   )r   r   Z_filpathtabler   r   r   r   -   s    zFeatherDataset.read_tablec                 C   s(   | j |j s$td|| j |j d S )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   Zpiecer   r   r   r   r   F   s     zFeatherDataset.validate_schemasc                 C   s   | j |dj|dS )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   Z	to_pandas)r   r   r   r   r   r   read_pandasM   s    zFeatherDataset.read_pandas)T)N)NT)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r	      s
   

r	   c                 C   sN   |j dkrd S |jt t fkr4td| ntd| t|jd S )Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)Z
num_chunkstypeextbinarystringr   r   str)namecolr   r   r   check_chunked_overflowa   s    
 r*   lz4ZzstdZuncompressed   c                 C   s  t jr"t jr"t| t jjr"|  } t | r|dkr:d}n|dkrHd}ntdt	j
| |d}|dkrt|jjD ]\}}	|| }
t|	|
 qrn| }|dkrt|jtt|jkrtd|dk	rtd|dk	rtd	n<|dkrtd
rd}n$|dk	r|tkrtd|tztj||||||d W nL tk
r   t|trzzt| W n tjk
rx   Y nX  Y nX dS )a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr,   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize optionZ	lz4_framer+   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   Zhave_pandasZ
has_sparse
isinstancepdZSparseDataFrameZto_denseZis_data_framer   r   Zfrom_pandas	enumerater   namesr*   lenZcolumn_namessetr   Zis_available_FEATHER_SUPPORTED_CODECSr   r   write_feather	Exceptionr'   osremoveerror)Zdfdestr.   r/   r0   r1   r-   r   ir(   r)   r   r   r   r9   s   s\    



 
r9   TFc                 K   s    t | |||djf d|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr   r   r   )sourcer   r   r@   kwargsr   r   r   read_feather   s      
rC   c                 C   s   t j| ||d}|dkr | S dd |D }ttdd |rL||}n<ttdd |rj||}ndd |D }td	|||j	d
k r|S t
t||kr|S ||S dS )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )Zuse_memory_mapr   Nc                 S   s   g | ]}t |qS r   )r#   ).0columnr   r   r   
<listcomp>  s     zread_table.<locals>.<listcomp>c                 S   s   | t kS r
   )inttr   r   r   <lambda>      zread_table.<locals>.<lambda>c                 S   s   | t kS r
   )r'   rH   r   r   r   rJ     rK   c                 S   s   g | ]
}|j qS r   )r   )rD   rI   r   r   r   rF     s     z<Columns must be indices or names. Got columns {} of types {}   )r   ZFeatherReaderreadallmapZread_indicesZ
read_names	TypeErrorr   r1   sortedr7   select)rA   r   r@   r   readerZcolumn_typesr   Zcolumn_type_namesr   r   r   r      s,       
r   )NNNr,   )NTF)NFT)r;   Zpyarrow.pandas_compatr   Zpyarrow.libr   r   r   r   libr$   Zpyarrowr   Zpyarrow._featherr   r	   r*   r8   r9   rC   r   r   r   r   r   <module>   s    D
    
T  
 