U
    饡c0                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ G dd dZ	dZ
G dd	 d	Zdd
dZde_ddddddddddddddZde
e_dS )    )IntegralN)Table)_resolve_filesystem_and_pathc                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zd-d'd(Zd.d)d*Zd/d+d,Zd&S )0ORCFilea  
    Reader interface for a single ORC file

    Parameters
    ----------
    source : str or pyarrow.NativeFile
        Readable source. For passing Python file objects or byte buffers,
        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
    c                 C   s   t  | _| j| d S N)_orcZ	ORCReaderreaderopen)selfsource r   //tmp/pip-unpacked-wheel-qoi2rb4q/pyarrow/orc.py__init__&   s    
zORCFile.__init__c                 C   s
   | j  S )z/The file metadata, as an arrow KeyValueMetadata)r   metadatar
   r   r   r   r   *   s    zORCFile.metadatac                 C   s
   | j  S )z#The file schema, as an arrow schema)r   schemar   r   r   r   r   /   s    zORCFile.schemac                 C   s
   | j  S )zThe number of rows in the file)r   nrowsr   r   r   r   r   4   s    zORCFile.nrowsc                 C   s
   | j  S )z!The number of stripes in the file)r   nstripesr   r   r   r   r   9   s    zORCFile.nstripesc                 C   s
   | j  S )z4Format version of the ORC file, must be 0.11 or 0.12)r   file_versionr   r   r   r   r   >   s    zORCFile.file_versionc                 C   s
   | j  S )z2Software instance and version that wrote this file)r   software_versionr   r   r   r   r   C   s    zORCFile.software_versionc                 C   s
   | j  S )zCompression codec of the file)r   compressionr   r   r   r   r   H   s    zORCFile.compressionc                 C   s
   | j  S )z?Number of bytes to buffer for the compression codec in the file)r   compression_sizer   r   r   r   r   M   s    zORCFile.compression_sizec                 C   s
   | j  S )z{Name of the writer that wrote this file.
        If the writer is unknown then its Writer ID
        (a number) is returned)r   writerr   r   r   r   r   R   s    zORCFile.writerc                 C   s
   | j  S )zVersion of the writer)r   writer_versionr   r   r   r   r   Y   s    zORCFile.writer_versionc                 C   s
   | j  S )zRNumber of rows per an entry in the row index or 0
        if there is no row index)r   row_index_strider   r   r   r   r   ^   s    zORCFile.row_index_stridec                 C   s
   | j  S )zNumber of stripe statistics)r   nstripe_statisticsr   r   r   r   r   d   s    zORCFile.nstripe_statisticsc                 C   s
   | j  S )z/Length of the data stripes in the file in bytes)r   content_lengthr   r   r   r   r   i   s    zORCFile.content_lengthc                 C   s
   | j  S )z<The number of compressed bytes in the file stripe statistics)r   stripe_statistics_lengthr   r   r   r   r   n   s    z ORCFile.stripe_statistics_lengthc                 C   s
   | j  S )z1The number of compressed bytes in the file footer)r   file_footer_lengthr   r   r   r   r   s   s    zORCFile.file_footer_lengthc                 C   s
   | j  S )z*The number of bytes in the file postscript)r   file_postscript_lengthr   r   r   r   r   x   s    zORCFile.file_postscript_lengthc                 C   s
   | j  S )zThe number of bytes in the file)r   file_lengthr   r   r   r   r    }   s    zORCFile.file_lengthNc                 C   s   |d krd S | j }g }|D ]f}t|trxt|}d|  krHt|k rbn n|| j}|| qtdt||f q|  S q|S )Nr   z/Column indices must be in 0 <= ind < %d, got %d)r   
isinstancer   intlennameappend
ValueError)r
   columnsr   namescolr   r   r   _select_names   s    



zORCFile._select_namesc                 C   s   |  |}| jj||dS )a  Read a single stripe from the file.

        Parameters
        ----------
        n : int
            The stripe index
        columns : list
            If not None, only these columns will be read from the stripe. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.RecordBatch
            Content of the stripe as a RecordBatch.
        r'   )r*   r   read_stripe)r
   nr'   r   r   r   r,      s    
zORCFile.read_stripec                 C   s   |  |}| jj|dS )a~  Read the whole file.

        Parameters
        ----------
        columns : list
            If not None, only these columns will be read from the file. A
            column name may be a prefix of a nested field, e.g. 'a' will select
            'a.b', 'a.c', and 'a.d.e'

        Returns
        -------
        pyarrow.Table
            Content of the file as a Table.
        r+   )r*   r   read)r
   r'   r   r   r   r.      s    
zORCFile.read)N)N)N)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r*   r,   r.   r   r   r   r   r      sN   



















r   a  file_version : {"0.11", "0.12"}, default "0.12"
    Determine which ORC file version to use.
    `Hive 0.11 / ORC v0 <https://orc.apache.org/specification/ORCv0/>`_
    is the older version
    while `Hive 0.12 / ORC v1 <https://orc.apache.org/specification/ORCv1/>`_
    is the newer one.
batch_size : int, default 1024
    Number of rows the ORC writer writes at a time.
stripe_size : int, default 64 * 1024 * 1024
    Size of each ORC stripe in bytes.
compression : string, default 'uncompressed'
    The compression codec.
    Valid values: {'UNCOMPRESSED', 'SNAPPY', 'ZLIB', 'LZ4', 'ZSTD'}
    Note that LZ0 is currently not supported.
compression_block_size : int, default 64 * 1024
    Size of each compression block in bytes.
compression_strategy : string, default 'speed'
    The compression strategy i.e. speed vs size reduction.
    Valid values: {'SPEED', 'COMPRESSION'}
row_index_stride : int, default 10000
    The row index stride i.e. the number of rows per
    an entry in the row index.
padding_tolerance : double, default 0.0
    The padding tolerance.
dictionary_key_size_threshold : double, default 0.0
    The dictionary key size threshold. 0 to disable dictionary encoding.
    1 to always enable dictionary encoding.
bloom_filter_columns : None, set-like or list-like, default None
    Columns that use the bloom filter.
bloom_filter_fpp : double, default 0.05
    Upper limit of the false-positive rate of the bloom filter.
c                   @   sd   e Zd ZdeZdZddddddd	d
d
dddddZdd Zdd Z	dd Z
dd Zdd ZdS )	ORCWritera  
Writer interface for a single ORC file

Parameters
----------
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
F0.12      uncompressed   speed'          N皙?r   
batch_sizestripe_sizer   compression_block_sizecompression_strategyr   padding_tolerancedictionary_key_size_thresholdbloom_filter_columnsbloom_filter_fppc                C   s8   t  | _| jj|||||||||	|
||d d| _d S )Nr>   T)r   r4   r   r	   is_open)r
   wherer   r?   r@   r   rA   rB   r   rC   rD   rE   rF   r   r   r   r      s     
zORCWriter.__init__c                 C   s   |    d S r   closer   r   r   r   __del__  s    zORCWriter.__del__c                 C   s   | S r   r   r   r   r   r   	__enter__  s    zORCWriter.__enter__c                 O   s   |    d S r   rI   )r
   argskwargsr   r   r   __exit__  s    zORCWriter.__exit__c                 C   s   | j s
t| j| dS )a
  
        Write the table into an ORC file. The schema of the table must
        be equal to the schema used when opening the ORC file.

        Parameters
        ----------
        table : pyarrow.Table
            The table to be written into the ORC file
        N)rG   AssertionErrorr   write)r
   tabler   r   r   rQ     s    

zORCWriter.writec                 C   s   | j r| j  d| _ dS )z$
        Close the ORC file
        FN)rG   r   rJ   r   r   r   r   rJ   "  s    
zORCWriter.close)r/   r0   r1   format_orc_writer_args_docsr2   rG   r   rK   rL   rO   rQ   rJ   r   r   r   r   r4      s*   
r4   c                 C   s\   t | |\}}|d k	r ||} |d k	rHt|dkrHt|  |}nt| j|d}|S )Nr   r+   )r   Zopen_input_filer#   r   r.   select)r   r'   
filesystempathresultr   r   r   
read_table+  s    
rY   aI  
Read a Table from an ORC file.

Parameters
----------
source : str, pyarrow.NativeFile, or file-like object
    If a string passed, can be a single file name. For file-like objects,
    only read a single file. Use pyarrow.BufferReader to read a file
    contained in a bytes or buffer-like object.
columns : list
    If not None, only these columns will be read from the file. A column
    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
    'a.c', and 'a.d.e'. If empty, no columns will be read. Note
    that the table will still have the correct num_rows set despite having
    no columns.
filesystem : FileSystem, default None
    If nothing passed, will be inferred based on path.
    Path will try to be found in the local on-disk filesystem otherwise
    it will be parsed as an URI to determine the filesystem.
r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   c                C   s^   t |tr$tjdtdd ||  } }t|||||||||	|
||d}||  W 5 Q R X d S )NzThe order of the arguments has changed. Pass as 'write_table(table, where)' instead. The old order will raise an error in the future.   )
stacklevelr>   )r!   r   warningswarnFutureWarningr4   rQ   )rR   rH   r   r?   r@   r   rA   rB   r   rC   rD   rE   rF   r   r   r   r   write_tableN  s.    
 
r_   a]  
Write a table into an ORC file.

Parameters
----------
table : pyarrow.lib.Table
    The table to be written into the ORC file
where : str or pyarrow.io.NativeFile
    Writable target. For passing Python file objects or byte buffers,
    see pyarrow.io.PythonFileInterface, pyarrow.io.BufferOutputStream
    or pyarrow.io.FixedSizeBufferWriter.
{}
)NN)Znumbersr   r\   Zpyarrow.libr   Zpyarrow._orcr   Z
pyarrow.fsr   r   rT   r4   rY   r2   r_   rS   r   r   r   r   <module>   s2    #"L
$