U
    -ehf                     @   s6  d dl Z d dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZ d dlmZ d dlmZ z$d dlmZ d dl
mZmZmZ W n ek
r   dZY nX z(d dlZd dlmZ d dl
mZmZ W n ek
r   d ZZY nX ejjZ ejjdd	 Z!ejjd
d Z"ejjedd Z#ejjedd Z$ejjedd Z%ejjedd Z&ejjedd Z'ejjedd Z(ejjdd Z)ejjedd Z*ejjedd Z+ejjej,ddd  Z-ejjed!d" Z.ejjed#d$ Z/ejjed%d& Z0ejjed'd( Z1ejjed)d* Z2ejjed+d, Z3ejjed-d. Z4ejjed/d0 Z5ejjed1d2 Z6ejjed3d4 Z7ejjd5d6 Z8ejjed7d8 Z9ejjed9d: Z:ejjeej;d;d<d=dgej;d>d?d@gdAdB Z<ejjdCdD Z=ejjdEdF Z>dS )G    N)LocalFileSystemSubTreeFileSystem)parametrize_legacy_dataset(parametrize_legacy_dataset_not_supported)guid)Version)_read_table_test_dataframe_write_table)_roundtrip_pandas_dataframealltypes_samplec                 C   s   t dd}| d }tj|}d|jjks.tt|| t	|j}d|ksPtt
|d d}|d dd ddd	d
gkstd S )N'  sizepandas_roundtrip.parquets   pandasutf8index_columnsranger      )kindnamestartstopstep)r   paTablefrom_pandasschemametadataAssertionErrorr
   pqread_metadatajsonloadsdecode)tempdirdffilenamearrow_tabler   js r*   b/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/tests/parquet/test_pandas.py#test_pandas_parquet_custom_metadata6   s    

 r,   c              	   C   s   t t dt  t dt  t dt  g}ttj	dtj
dtj	dtjddddgd	}td
dgdd gd d gd	}t jj||dd}t jj||dd}|jj|jddrt|j|jsttj| d |d}|| || d S )Nintfloatstring   dtypeZABBAZEDDAZACDC)r-   r.   r/         g?F)r   preserve_indexT)Zcheck_metadatazmerged.parquet)r   )r   r   fieldint16float32r/   pd	DataFramenparangeuint8r   r   equalsr   r    ZParquetWriterwrite_table)r%   r   df1df2Ztable1Ztable2writerr*   r*   r+   :test_merging_parquet_tables_with_different_pandas_metadataJ   s,    
rC   c                 C   s   t dd}tjjtt|j|jd d d ddgd|_| d }tj	|}|j
jd k	s\tt|| tj||d}| }t|| d S )	N
   r   Zlevel_1Zlevel_2namesr   use_legacy_dataset)r   r9   
MultiIndexfrom_tupleslistzipcolumnsr   r   r   r   pandas_metadatar   r
   r    read_pandas	to_pandastmassert_frame_equal)r%   rI   r&   r'   r(   
table_readdf_readr*   r*   r+   %test_pandas_parquet_column_multiindexg   s    

 rV   c           	      C   s   t dd}| d }tjj|dd}|jj}|d r6t|d sBtt|| tj	||d}|jj}|d rnt|jj
}|jj
|kst| }t|| d S )	Nr   r   r   Fr5   r   rN   rH   )r   r   r   r   r   rO   r   r
   r    rP   r   rQ   rR   rS   )	r%   rI   r&   r'   r(   r)   rT   r!   rU   r*   r*   r+   >test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written|   s"    

 rX   c                 C   s   d}t jd tt j|t jdt j|t jdd}| d }tj	
|}t|| t|dg|d}| }t|dg | t|ddg|d}| }t|dg | d S )Nr   r   r1   )r=   uint16r   r=   rN   rI   )r;   randomseedr9   r:   r<   r=   rY   r   r   r   r
   r   rQ   rR   rS   )r%   rI   r   r&   r'   r(   rT   rU   r*   r*   r+   test_pandas_column_selection   s.    
   r]   c                 C   s\   t d}tj|}t }t||dd | }t|}t||d	 }t
|| d S )Nr   2.6versionrH   r	   r   r   r   BufferOutputStreamr
   getvalueBufferReaderr   rQ   rR   rS   r%   rI   r&   r(   imosbufreaderrU   r*   r*   r+   )test_pandas_parquet_native_file_roundtrip   s    
 
ri   c                 C   sl   t d}tj|}t }t||dd | }t|}tj	|ddg|d
 }t|ddg | d S )Nr   r^   r_   stringsr=   rZ   )r	   r   r   r   rb   r
   rc   rd   r    rP   rQ   rR   rS   re   r*   r*   r+   test_read_pandas_column_subset   s    
 
rk   c                 C   s\   t d}tj|}t }t||dd | }t|}t||d	 }t
|| d S )Nr   r^   r_   rH   ra   re   r*   r*   r+   #test_pandas_parquet_empty_roundtrip   s    
 
rl   c                 C   sJ   ddiddiddigdd}t j|d}tj|}t }t|| d S )	NZ	page_typer   Zrecord_typeZnon_consecutive_homer   Z1001)Zagg_colZ	uid_first)data)r9   r:   r   r   r   rb   r
   )r%   rm   r&   r(   rf   r*   r*   r+   !test_pandas_can_write_nested_data   s    rn   c           
      C   s   | d }d}t tj|tjdtj|tjdtj|tjdtj|dkddd ddgd	}t	j
|}|d
}t||dd W 5 Q R X t| }t||d}| }	t||	 d S )Nzpandas_pyfile_roundtrip.parquetr4   r1   r   foobarbazZqux)int64r8   float64boolrj   wbr^   r_   rH   )r9   r:   r;   r<   rr   r8   rs   r[   randnr   r   r   openr
   ioBytesIO
read_bytesr   rQ   rR   rS   )
r%   rI   r'   r   r&   r(   frm   rT   rU   r*   r*   r+   $test_pandas_parquet_pyfile_roundtrip   s     r|   c                 C   s  d}t jd tt j|t jdt j|t jdt j|t jdt j|t j	dt j|t j
dt j|t j
dt j|t jdt j|t jdt j|t jdt j|t jdt j|dkd}| d }tj|}dD ]4}t||d|d t||d	}| }t|| qdD ]6}	t||d|	d
 t||d	}| }t|| qdD ]T}
|
dkrftjj|
sfqDt||d|
d t||d	}| }t|| qDd S )Nr   r   r1   )r=   rY   uint32uint64Zint8r7   int32rr   r8   rs   rt   r   )TFr^   )r`   use_dictionaryrH   )r`   write_statistics)NONEZSNAPPYZGZIPZLZ4ZZSTDr   )r`   compression)r;   r[   r\   r9   r:   r<   r=   rY   r}   r~   r7   r   rr   r8   rs   rv   r   r   r   r
   r   rQ   rR   rS   libCodecZis_available)r%   rI   r   r&   r'   r(   r   rT   rU   r   r   r*   r*   r+   )test_pandas_parquet_configuration_options  sd     
 r   z)ignore:Parquet format '2.0':FutureWarningc                  C   sJ   t dd} tddt|  d| _d| j_t| ddd}t||  d S )	Nd   r   r   rD   ro   z2.0Zspark)r`   Zflavor)	r	   r;   r<   lenindexr   r   rR   rS   )r&   resultr*   r*   r+   +test_spark_flavor_preserves_pandas_metadata=  s    
r   c                 C   s   t ddt ddit dt dt dt did}t| d }t j|ddjdd	d
}tj|}t|| t	||d}|
 }t|| d S )Nz2017-06-30 01:31:00g*_c@z2017-06-30 01:32:00)closetimedata.parquetzdatetime64[us]r1   r   FZdroprH   )r9   	Timestampstrr:   	set_indexr   r   r   r
   r   rQ   rR   rS   )r%   rI   rm   pathZdfxZtdfxr(   	result_dfr*   r*   r+    test_index_column_name_duplicateI  s,        
r   c           
      C   s   d}t t|}tjjdddg|gddgd}tjd|i|d}tj|}| d	 }t	|| t
||d
}||sxt| }	t|	| d S )Nr0   ro   rp   ZfoobarZsome_numbersrF   numbers)r   zdup_multi_index_levels.parquetrH   )rL   r   r9   rJ   from_arraysr:   r   r   r   r
   r   r>   r   rQ   rR   rS   )
r%   rI   Znum_rowsr   r   r&   tabler'   Zresult_tabler   r*   r*   r+    test_multiindex_duplicate_valuesh  s    
r   c                 C   sF   d}t jt|dd ddd}t| d |d}| }t|| d S )N  carat        cut  color  clarity  depth  table  price     x     y     z
 0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
 0.21    Premium      E      SI1   59.8   61.0    326  3.89  3.84  2.31
 0.23       Good      E      VS1   56.9   65.0    327  4.05  4.07  2.31
 0.29    Premium      I      VS2   62.4   58.0    334  4.20  4.23  2.63
 0.31       Good      J      SI2   63.3   58.0    335  4.34  4.35  2.75
 0.24  Very Good      J     VVS2   62.8   57.0    336  3.94  3.96  2.48
 0.24  Very Good      I     VVS1   62.3   57.0    336  3.95  3.98  2.47
 0.26  Very Good      H      SI1   61.9   55.0    337  4.07  4.11  2.53
 0.22       Fair      E      VS2   65.1   61.0    337  3.87  3.78  2.49
 0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39\s{2,}r   pythonsepZ	index_colheaderZenginezv0.7.1.parquetrH   )r9   read_csvrx   ry   r   rQ   rR   rS   datadirrI   Zexpected_stringexpectedr   r   r*   r*   r+   &test_backwards_compatible_index_naming  s       r   c                 C   sP   d}t jt|ddddgddd }t| d	 |d
}| }t|| d S )Nr   r   cutcolorclarityr   r   r   zv0.7.1.all-named-index.parquetrH   )	r9   r   rx   ry   
sort_indexr   rQ   rR   rS   r   r*   r*   r+   1test_backwards_compatible_index_multi_level_named  s      
r   c                 C   sd   d}t jt|ddddgddd }|jdd dg|_t| d	 |d
}| }t	
|| d S )Nr   r   r   r   r   r   r   r   zv0.7.1.some-named-index.parquetrH   )r9   r   rx   ry   r   r   Z	set_namesr   rQ   rR   rS   r   r*   r*   r+   6test_backwards_compatible_index_multi_level_some_named  s      
r   c              	   C   s   t dddgdddgt jdddd	d
}t jjdddgt jdddd	gdd gd|_| d }t||d}| }t	|| t|dg|d}| }t	||dg j
dd d S )Nr      r0   g?g?g333333?z
2017-01-01zEurope/Brussels)Zperiodstz)abcr   r   r   r   rF   z'v0.7.1.column-metadata-handling.parquetrH   rZ   Tr   )r9   r:   Z
date_rangerJ   r   r   r   rQ   rR   rS   Zreset_index)r   rI   r   r   r   r   r*   r*   r+   2test_backwards_compatible_column_metadata_handling  s,      r   c                 C   s   t jddgddggddgd}|d d|d< |dg}tj|}t }t	|| tj
| | d	 }t|jt jst|j|jstd S )
Nr   r   r   dc1c2rN   categoryrH   )r9   r:   astyper   r   r   r   rb   r    r?   rP   rc   rQ   
isinstancer   ZCategoricalIndexr   r>   )rI   r&   r   bosZref_dfr*   r*   r+   )test_categorical_index_survives_roundtrip  s     
r   c                 C   sr   t dt jddddgdddgddi}tj|}t }t|| |	 }tj
|| d }t|| d S )Nr   r   r   r   T)
categoriesZorderedrH   )r9   r:   Categoricalr   r   r   rb   r    r?   rc   rP   rQ   rR   rS   )rI   r&   r   r   contentsr   r*   r*   r+   )test_categorical_order_survives_roundtrip  s    

  
 
r   c                 C   s   t d gd dgd d}|ddd}tj|}tj|}t }tj||ddd tj	|
 | d}|d	 |d	 st|d
 |d
 std S )Nr   g      ?)colr-   r   r^   rD   )r`   
chunk_sizerH   r   r   )r9   r:   r   r   r   r   rb   r    r?   
read_tablerc   r>   r   )rI   r&   Zdf_categoryr   Z	table_catrg   r   r*   r*   r+   *test_pandas_categorical_na_type_row_groups  s     r   c                 C   s   t jdddddddgdd}dddg}td	tjj||d
i}t }t	t
|| tj| | d }|jjdkst|jjj|k stt|| d S )Nr   r   rE   r   r1   ro   rp   rq   x)r   rH   r   )r;   arrayr9   r:   r   Z
from_codesr   rb   r    r?   r   r   rc   rQ   r   r2   r   catr   allrR   rS   )rI   codesr   r&   rg   r   r*   r*   r+   !test_pandas_categorical_roundtrip   s    
 
 
r   c                 C   s   t tjt dk rtd tjddddgidd}|d}tddddgi}|d}t|d t|d ks|t	t|d j
jjt|d j
jjkst	t| d	 }tt|| t| }t|| d S )
Nz1.3.0z:PyArrow backed string data type introduced in pandas 1.3.0r   ro   rp   zstring[pyarrow]r1   r   zcat.parquet)r   r9   __version__pytestskipr:   r   r   r   r   r   r   valuesr   r    r?   r   r   rQ   rR   rS   )r%   r@   rA   r   r   r*   r*   r+   )test_categories_with_string_pyarrow_dtype5  s    


 
r   c                 C   s  t ddddgd}|d d|d< t|}tj|t| d d	g|d
 tjt| d |d	 }t
|dg |dg  tj|t| d |d tjt| d |d	 }t
|dg |dg  t|t| d  tjt| d |d	 }t
|dg |dg  d S )Nr   r   r   r0   partr   r   ZInt64case1r   Zpartition_colsrI   rH   case2r   )r9   r:   r   r   r   r    write_to_datasetr   r   rQ   rR   rS   r?   )r%   rI   r&   r   r   r*   r*   r+   5test_write_to_dataset_pandas_preserve_extensiondtypesM  s<    
 
 
 
 
 
 

 
r   c                 C   s  t dddgdddgd}t jdddgdd	|_t|}|d
dg  }|d d|d< tj	|t
| d dg|d tjt
| d |d }t|| tj	|t
| d |d tjt
| d |d }t|| t|t
| d  tjt
| d |d }t|| d S )Nr   r   r   r   r0   r   r   idxr   r   r   r   r   r   rH   r   r   )r9   r:   Indexr   r   r   copyr   r    r   r   r   rQ   rR   rS   r?   )r%   rI   r&   r   Zdf_catr   r*   r*   r+   +test_write_to_dataset_pandas_preserve_indexn  s@    
 
 
 
 
 
 

 
r   r5   TFmetadata_fname	_metadataZ_common_metadatac                    sR  d}d}| t   }|  g }g }g }	t|D ]}
t||
d}tjtj|
| |
d | dddd|_|d	|
 }t
jj||d	}|d }|jjd kstt|| || || |	| q.t
jj||d	}t|j||  tj||d
}ddg |j d }t fdd|D }|dk	r:|jjnd |j_t|| d S )Nr4   )r\   r   rr   r1   r   r   z
{}.parquetrW   rH   r=   rj   r   c                    s   g | ]}|  qS r*   r*   ).0r   r   r*   r+   
<listcomp>  s     z<test_dataset_read_pandas_common_metadata.<locals>.<listcomp>F)r   mkdirr   r	   r9   r   r;   r<   r   formatr   r   r   Zreplace_schema_metadatar   r   r   r
   appendr    Zwrite_metadataZParquetDatasetrP   rQ   concatr   rR   rS   )r%   rI   r5   r   Znfilesr   dirpathZ	test_dataframespathsir&   r   r   Ztable_for_metadataZdatasetr   r   r*   r   r+   (test_dataset_read_pandas_common_metadata  sB    
 



 r   c                 C   sX   t ddddgi}| d }t|| tjdtt| t d}|t	
|sTtd S )Nr   r   r   r0   r   )
filesystem)r9   r:   r
   r    rP   r   r   r   r>   r   r   r   )r%   r&   r'   r   r*   r*   r+   %test_read_pandas_passthrough_keywords  s    
r   c                 C   s   t t ddgddggt ddgd}| d }tt t }ttd	|td
t g}tj	||}t
|| t| }t|| d S )N)idZ	something)Zvalue2else)r   Z
something2)valueZelse2ro   rp   )col1col2r   r   r   )r9   r:   ZSeriesr   Zmap_r/   r   r6   r   r   r
   r    rP   rQ   rR   rS   )r%   r&   r'   Zudtr   r(   r   r*   r*   r+   test_read_pandas_map_fields  s    "
r   )?rx   r"   numpyr;   r   Zpyarrowr   Z
pyarrow.fsr   r   Zpyarrow.tests.parquet.commonr   r   Zpyarrow.utilr   Zpyarrow.vendored.versionr   Zpyarrow.parquetZparquetr    r   r	   r
   ImportErrorZpandasr9   Zpandas.testingtestingrR   r   r   markZ
pytestmarkr,   rC   rV   rX   r]   ri   rk   rl   rn   r|   r   filterwarningsr   r   r   r   r   r   r   r   r   r   r   r   r   r   Zparametrizer   r   r   r*   r*   r*   r+   <module>   s   



/


!/
