U
    -e{'                     @   s<  d dl Z d dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ d dlmZ G dd dejZdZG dd dejZG dd	 d	ejZG d
d dejZdd Zdd ZdddddZ d!ee _"dddddZ#dddddZ$d!ee$_"d#dddddZ%dddddZ&dd d!d"Z'dS )$    N)IpcReadOptionsIpcWriteOptions	ReadStats
WriteStatsMessageMessageReaderRecordBatchReader_ReadPandasMixinMetadataVersionread_messageread_record_batchread_schemaread_tensorwrite_tensorget_record_batch_sizeget_tensor_sizec                   @   s    e Zd ZdZdddddZdS )RecordBatchStreamReadera  
    Reader for the Arrow streaming binary format.

    Parameters
    ----------
    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
        Either an in-memory buffer, or a readable file object.
        If you want to use memory map use MemoryMappedFile as source.
    options : pyarrow.ipc.IpcReadOptions
        Options for IPC deserialization.
        If None, default values will be used.
    memory_pool : MemoryPool, default None
        If None, default memory pool is used.
    Noptionsmemory_poolc                C   s   t |}| j|||d d S )Nr    _ensure_default_ipc_read_options_open)selfsourcer   r    r   L/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/ipc.py__init__2   s    z RecordBatchStreamReader.__init____name__
__module____qualname____doc__r   r   r   r   r   r   "   s   r   aN  Parameters
----------
sink : str, pyarrow.NativeFile, or file-like Python object
    Either a file path, or a writable file object.
schema : pyarrow.Schema
    The Arrow schema for data to be written to the file.
use_legacy_format : bool, default None
    Deprecated in favor of setting options. Cannot be provided with
    options.

    If None, False will be used unless this default is overridden by
    setting the environment variable ARROW_PRE_0_15_IPC_FORMAT=1
options : pyarrow.ipc.IpcWriteOptions
    Options for IPC serialization.

    If None, default values will be used: the legacy format will not
    be used unless overridden by setting the environment variable
    ARROW_PRE_0_15_IPC_FORMAT=1, and the V5 metadata version will be
    used unless overridden by setting the environment variable
    ARROW_PRE_1_0_METADATA_VERSION=1.c                   @   s&   e Zd ZdeZdddddZdS )RecordBatchStreamWriterz0Writer for the Arrow streaming binary format

{}Nuse_legacy_formatr   c                C   s   t ||}| j|||d d S Nr   _get_legacy_format_defaultr   r   sinkschemar%   r   r   r   r   r   S   s    
z RecordBatchStreamWriter.__init__r   r    r!   format_ipc_writer_class_docr"   r   r   r   r   r   r#   N   s   r#   c                   @   s"   e Zd ZdZddddddZdS )RecordBatchFileReadera  
    Class for reading Arrow record batch data from the Arrow binary file format

    Parameters
    ----------
    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
        Either an in-memory buffer, or a readable file object.
        If you want to use memory map use MemoryMappedFile as source.
    footer_offset : int, default None
        If the file is embedded in some larger file, this is the byte offset to
        the very end of the file data
    options : pyarrow.ipc.IpcReadOptions
        Options for IPC serialization.
        If None, default values will be used.
    memory_pool : MemoryPool, default None
        If None, default memory pool is used.
    Nr   c                C   s   t |}| j||||d d S )Nfooter_offsetr   r   r   )r   r   r2   r   r   r   r   r   r   k   s
     zRecordBatchFileReader.__init__)Nr   r   r   r   r   r0   X   s   r0   c                   @   s&   e Zd ZdeZdddddZdS )RecordBatchFileWriterz1Writer to create the Arrow binary file format

{}Nr$   c                C   s   t ||}| j|||d d S r&   r(   r*   r   r   r   r   x   s    
zRecordBatchFileWriter.__init__r-   r   r   r   r   r3   r   s   r3   c                 C   s   | d k	r|d k	rt dn$|r>t|ts:tdt||S tj}| d krbtt	t
jdd} tt	t
jddr~tj}t| |dS )Nz8Can provide at most one of options and use_legacy_formatz expected IpcWriteOptions, got {}ZARROW_PRE_0_15_IPC_FORMAT0ZARROW_PRE_1_0_METADATA_VERSION)r%   metadata_version)
ValueError
isinstancer   	TypeErrorr.   typer
   ZV5boolintosenvirongetZV4)r%   r   r5   r   r   r   r)   }   s&    
r)   c                 C   s*   | r t | ts tdt| | p(t S )Nzexpected IpcReadOptions, got {})r7   r   r8   r.   r9   r'   r   r   r   r      s
    r   r$   c                C   s   t | |||dS Nr$   )r#   r+   r,   r%   r   r   r   r   
new_stream   s    rA   zCreate an Arrow columnar IPC stream writer instance

{}

Returns
-------
writer : RecordBatchStreamWriter
    A writer for the given sink
r   c                C   s   t | ||dS )a  
    Create reader for Arrow streaming format.

    Parameters
    ----------
    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
        Either an in-memory buffer, or a readable file object.
    options : pyarrow.ipc.IpcReadOptions
        Options for IPC serialization.
        If None, default values will be used.
    memory_pool : MemoryPool, default None
        If None, default memory pool is used.

    Returns
    -------
    reader : RecordBatchStreamReader
        A reader for the given source
    r   )r   )r   r   r   r   r   r   open_stream   s    rB   c                C   s   t | |||dS r?   )r3   r@   r   r   r   new_file   s    rC   zCreate an Arrow columnar IPC file writer instance

{}

Returns
-------
writer : RecordBatchFileWriter
    A writer for the given sink
c                C   s   t | |||dS )a  
    Create reader for Arrow file format.

    Parameters
    ----------
    source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
        Either an in-memory buffer, or a readable file object.
    footer_offset : int, default None
        If the file is embedded in some larger file, this is the byte offset to
        the very end of the file data.
    options : pyarrow.ipc.IpcReadOptions
        Options for IPC serialization.
        If None, default values will be used.
    memory_pool : MemoryPool, default None
        If None, default memory pool is used.

    Returns
    -------
    reader : RecordBatchFileReader
        A reader for the given source
    r1   )r0   )r   r2   r   r   r   r   r   	open_file   s      rD   nthreadspreserve_indexc             	   C   sF   t jj| ||d}t  }t ||j}|| W 5 Q R X | S )a  
    Serialize a pandas DataFrame into a buffer protocol compatible object.

    Parameters
    ----------
    df : pandas.DataFrame
    nthreads : int, default None
        Number of threads to use for conversion to Arrow, default all CPUs.
    preserve_index : bool, default None
        The default of None will store the index as a column, except for
        RangeIndex which is stored as metadata only. If True, always
        preserve the pandas index data as a column. If False, no index
        information is saved and the result will have a default RangeIndex.

    Returns
    -------
    buf : buffer
        An object compatible with the buffer protocol.
    rE   )paZRecordBatchZfrom_pandasZBufferOutputStreamr#   r,   Zwrite_batchgetvalue)ZdfrF   rG   batchr+   writerr   r   r   serialize_pandas   s    
rL   Tuse_threadsc             	   C   s4   t | }t |}| }W 5 Q R X |j|dS )a  Deserialize a buffer protocol compatible object into a pandas DataFrame.

    Parameters
    ----------
    buf : buffer
        An object compatible with the buffer protocol.
    use_threads : bool, default True
        Whether to parallelize the conversion using multiple threads.

    Returns
    -------
    df : pandas.DataFrame
        The buffer deserialized as pandas DataFrame
    rM   )rH   ZBufferReaderr   Zread_allZ	to_pandas)bufrN   Zbuffer_readerreadertabler   r   r   deserialize_pandas  s    
rR   )N)(r<   ZpyarrowrH   Zpyarrow.libr   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   libZ_RecordBatchStreamReaderr   r/   Z_RecordBatchStreamWriterr#   Z_RecordBatchFileReaderr0   Z_RecordBatchFileWriterr3   r)   r   rA   r.   r"   rB   rC   rD   rL   rR   r   r   r   r   <module>   s,   H
		