U
    -eI                     @  s  U d dl mZ d dlmZ d dlmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ eZeZeZeje	 e	 e	 e	 deje	 e	 e	 e	 deje	 e	 e	 dej de	 iej!de	" iiZ#d	e$d
< d6dddddZ%d7ddddZ&d8ddddddZ'd9ddddddZ(d:ddddddZ)d;ddd dd!d"Z*d#d$ Z+d%d& Z,d<d'd(d)d(ddd*d+d,Z-d=d-d.d)d(d(dd/d0d1d2Z.d>d-d.d)d(d(dd/d3d4d5Z/dS )?    )annotations)Any)	DtypeKindColumnBuffersColumnNullTypeN)Dtype)          @   )r	   r
   r   r   zdict[DtypeKind, dict[int, Any]]_PYARROW_DTYPESTDataFrameObjectzpa.Table)dfreturnc                 C  sP   t | tjr| S t | tjr*tj| gS t| ds<tdt| j|d|dS )a'  
    Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Table

    Examples
    --------
    >>> import pyarrow
    >>> from pyarrow.interchange import from_dataframe

    Convert a pandas dataframe to a pyarrow table:

    >>> import pandas as pd
    >>> df = pd.DataFrame({
    ...         "n_atendees": [100, 10, 1],
    ...         "country": ["Italy", "Spain", "Slovenia"],
    ...     })
    >>> df
       n_atendees   country
    0         100     Italy
    1          10     Spain
    2           1  Slovenia
    >>> from_dataframe(df)
    pyarrow.Table
    n_atendees: int64
    country: large_string
    ----
    n_atendees: [[100,10,1]]
    country: [["Italy","Spain","Slovenia"]]
    __dataframe__z#`df` does not support __dataframe__)
allow_copy)	
isinstancepaTableRecordBatchfrom_batcheshasattr
ValueError_from_dataframer   )r   r    r   c/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/interchange/from_dataframe.pyfrom_dataframe=   s    *
r   )r   c                 C  s2   g }|   D ]}t||}|| qtj|S )a  
    Build a ``pa.Table`` from the DataFrame interchange object.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Table
    )Z
get_chunksprotocol_df_chunk_to_pyarrowappendr   r   r   )r   r   Zbatcheschunkbatchr   r   r   r   s   s
    
r   boolzpa.RecordBatch)r   r   r   c                 C  s   i }|   D ]}t|ts*td| d||krBtd| d| |}|jd }|tjtjtj	tj
tjfkrt||||< q|tjkrt||||< q|tjkrt||||< qtd| dqtj|S )a  
    Convert interchange protocol chunk to ``pa.RecordBatch``.

    Parameters
    ----------
    df : DataFrameObject
        Object supporting the interchange protocol, i.e. `__dataframe__`
        method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.RecordBatch
    zColumn z is not a stringz is not uniquer   z
Data type z not handled yet)Zcolumn_namesr   strr   Zget_column_by_namedtyper   INTUINTFLOATSTRINGDATETIMEcolumn_to_arrayBOOLbool_column_to_arrayZCATEGORICAL categorical_column_to_dictionaryNotImplementedErrorr   r   Zfrom_pydict)r   r   columnsnamecolr#   r   r   r   r      s,    




r   ColumnObjectzpa.Array)r0   r   r   c                 C  s$   |   }t||  | j| j|}|S )a  
    Convert a column holding one of the primitive dtypes to a PyArrow array.
    A primitive type is one of: int, uint, float, bool (1 bit).

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array
    )get_buffersbuffers_to_arraysizedescribe_nulloffsetr0   r   buffersdatar   r   r   r)      s    
r)   c                 C  s>   |st d|  }t||  | j| j}t|t	 }|S )aD  
    Convert a column holding boolean dtype to a PyArrow array.

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array
    zfBoolean column will be casted from uint8 and a copy is required which is forbidden by allow_copy=False)
RuntimeErrorr2   r3   r4   r5   r6   pccastr   bool_r7   r   r   r   r+      s    
r+   zpa.DictionaryArrayc                 C  sb   |st d| j}|d s"td|d }t|}|  }t||  | j| j}t	j
||}|S )aV  
    Convert a column holding categorical data to a pa.DictionaryArray.

    Parameters
    ----------
    col : ColumnObject
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.DictionaryArray
    zjCategorical column will be casted from uint8 and a copy is required which is forbidden by allow_copy=FalseZis_dictionaryz-Non-dictionary categoricals not supported yet
categories)r:   Zdescribe_categoricalr-   r)   r2   r3   r4   r5   r6   r   ZDictionaryArrayZfrom_arrays)r0   r   ZcategoricalZ
cat_column
dictionaryr8   indicesZ
dict_arrayr   r   r   r,      s$    
r,   c                 C  sP   t d| }|r>|d|d }}|dkr6|d7 }||fS td|  dS )z4Parse datetime `format_str` to interpret the `data`.zts([smun]):(.*)      sz DateTime kind is not supported: N)rematchgroupr-   )
format_strZtimestamp_metaunittzr   r   r   parse_datetime_format_str#  s    rJ   c                 C  s`   | \}}}}|t jkr0t|\}}tj||dS t|i |d}|rL|S td|  ddS )z+Map column date type to pyarrow date type. )rI   NzConversion for  is not yet supported.)r   r(   rJ   r   	timestampr   getr-   )	data_typekind	bit_widthZf_string_rH   rI   Zpa_dtyper   r   r   map_date_type5  s    

rR   r   intr   )r8   lengthr5   r6   r   r   c                 C  s4  | d \}}z| d \}}W n t k
r4   d}Y nX z| d \}	}
W n t k
r^   d}	Y nX tj|j|j|d}|rt||||||}nt||||||}t|}|	r|
\}}}}tj|	j|	j|	d}|d dkrt }n|dkrt }nt	 }tj
j|||||g|d	}ntj
j||||g|d	}|S )
a  
    Build a PyArrow array from the passed buffer.

    Parameters
    ----------
    buffer : ColumnBuffers
        Dictionary containing tuples of underlying buffers and
        their associated dtype.
    length : int
        The number of values in the array.
    describe_null: ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Array

    Notes
    -----
    The returned array doesn't own the memory. The caller of this function
    is responsible for keeping the memory owner object alive as long as
    the returned PyArrow array is being used.
    r9   ZvalidityNoffsetsbaserB   Ur   r6   )	TypeErrorr   foreign_bufferptrbufsizevalidity_buffer_from_maskvalidity_buffer_nan_sentinelrR   Zlarge_stringstringArrayfrom_buffers)r8   rT   r5   r6   r   Z	data_buffrN   validity_buffvalidity_dtypeZoffset_buffZoffset_dtypedata_pa_bufferZvalidity_pa_buff
data_dtyperQ   Zoffset_bit_widthZoffset_pa_bufferZstring_typearrayr   r   r   r3   G  sh    $



r3   BufferObjectr   z	pa.Buffer)rc   rd   r5   rT   r6   r   r   c                 C  s  |\}}|\}}	}	}	|t jks"t|tjkr0dS |tjksL|tjkr|dkrtj| j	| j
| d}
|tjkr|svtdtjjt |d|
g|d}t|t }ntjjt |d|
g|d}|dkrt|}| d S |tjkr|dkrtj| j	| j
| dS t| ddS )a  
    Build a PyArrow buffer from the passed mask buffer.

    Parameters
    ----------
    validity_buff : BufferObject
        Tuple of underlying validity buffer and associated dtype.
    validity_dtype : Dtype
        Dtype description as a tuple ``(kind, bit-width, format string,
        endianness)``.
    describe_null : ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    length : int
        The number of values in the array.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Buffer
    NrA   rV   YTo create a bitmask a copy of the data is required which is forbidden by allow_copy=FalserY   r   * null representation is not yet supported.)r   r*   AssertionErrorr   NON_NULLABLEZUSE_BYTEMASKZUSE_BITMASKr   r[   r\   r]   r:   ra   rb   int8r;   r<   r=   invertr8   r-   )rc   rd   r5   rT   r6   r   	null_kindsentinel_valZvalidity_kindrQ   Zbuffmask	mask_boolr   r   r   r^     sN    !



r^   )re   rN   r5   rT   r6   r   r   c                 C  s  |\}}}}t |}	|\}
}|
tjkr|s2td|tjkrZ|dkrZt| d|
 dn8tjj	|	|d| g|d}t
|}t
|}| d S n|
tjkr|std|tjkrt }n|	}tjj	||d| g|d}t
||}t
|}| d S |
tjkr
nt| ddS )	a  
    Build a PyArrow buffer from NaN or sentinel values.

    Parameters
    ----------
    data_pa_buffer : pa.Buffer
        PyArrow buffer for the column data.
    data_type : Dtype
        Dtype description as a tuple ``(kind, bit-width, format string,
        endianness)``.
    describe_null : ColumnNullType
        Null representation the column dtype uses,
        as a tuple ``(kind, value)``
    length : int
        The number of values in the array.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).

    Returns
    -------
    pa.Buffer
    ri   r	   z with rK   NrY   rA   rj   )rR   r   ZUSE_NANr:   r   r&   r-   r   ra   rb   r;   is_nanrn   r8   ZUSE_SENTINELr(   int64equalrl   )re   rN   r5   rT   r6   r   rO   rP   rQ   rf   ro   rp   Zpyarrow_datarq   Zsentinel_dtypeZsentinel_arrrr   r   r   r   r_     sT    !






r_   )T)T)T)T)T)T)r   T)r   T)r   T)0
__future__r   typingr   Zpyarrow.interchange.columnr   r   r   Zpyarrowr   rD   Zpyarrow.computeZcomputer;   r   r   r1   rh   r$   rm   Zint16Zint32rt   r%   Zuint8Zuint16Zuint32Zuint64r&   Zfloat16Zfloat32Zfloat64r*   r'   r`   r   __annotations__r   r   r   r)   r+   r,   rJ   rR   r3   r^   r_   r   r   r   r   <module>   sj        
 
6 1  # ,  j  T  