U
    -eW                     @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlZ	d dl
mZmZ d dlmZ d dlmZ zd dlmZ d dl
mZ W n ek
r   dZY nX z$d dlZd dlmZ d dl
mZ W n ek
r   d ZZY nX ejjZejjdd	 Zd
d Zejjej ddddddge	! dddddd fdddddge	" dddddd fdddddge	# dddddd fdddddge	$ dddddd fdddddge	% dddddd fdddddge	& dddddd fdddddge	' dddddd fdddddge	( dddddd fdddddge	) dddddd fdddddge	* dddddd fdde+dddge	, dde+d-dddd fd d!d!d d ge	. d"d!d d d#d fd$d%d&dd'ge	, dd$d%ddd fgd(d) Z/d*d+ Z0ejjd,d- Z1d.d/ Z2d0d1 Z3d2d3 Z4d4d5 Z5ejjd6d7 Z6ej7d8d9d: Z8d;d< Z9ejjd=d> Z:ejjd?d@ Z;dAdB Z<ejj=ejj>dCdD Z?dEdF Z@dGdH ZAej dIdJeBdKidJeBdKidfdJeBdKidLeBdKidMfeBdKeBdKdNdOeBdKidPffdQdR ZCejjDdSdT ZEdS )U    N)OrderedDict)_check_roundtripmake_sample_file)LocalFileSystem)util)_write_table)alltypes_samplec            	   	   C   s  t dd} | jt| jd} tjjddt| d| _t	| }t| j}|j
}t| |jt| ksft|j|d ksxt|jdkst|jdkstd|jkstt|jtstt|j
tst|j}|j|kstt||d kstt| |d }t| |j| jd kst|jdks(t|jdks8t|jdksHt|jd	ksXt|jd
kshttt ||d   W 5 Q R X tt |d  W 5 Q R X t|jD ]`}| |}t|t!j"stt| t|jD ]*}|#|}t|t!j$stt| qqtt | d W 5 Q R X tt | |jd  W 5 Q R X | d}|jt| ksxt|j|d kst|j%dksttt |#d}W 5 Q R X tt |#|d }W 5 Q R X |#d}|j&dkst|j'dks
t|jd	kst|j(dks*t|j)dks:t|j*dksJtt|j+t!j,s^t|j-dksntt.|j/ddhkst|j0dkst|j1d kst|j2dkst|j3dkst|j4dksttt5 |j6 W 5 Q R X tt5 |j7 W 5 Q R X d S )N'  sizecolumnsr   i@B    2.6zparquet-cppBOOLEANNONE    boolTZSNAPPYZPLAINZRLEF)8r   Zreindexsortedr   nprandomrandintlenindexr   metadatareprnum_rowsAssertionErrornum_columnsnum_row_groupsformat_versionZ
created_by
isinstanceserialized_sizeintdictschemanameZmax_definition_levelZmax_repetition_levelphysical_typeZconverted_typepytestraises
IndexErrorrange	row_grouppqZRowGroupMetaDatacolumnZColumnChunkMetaDataZtotal_byte_sizefile_offset	file_path
num_valuesZpath_in_schemais_stats_set
statisticsZ
Statisticscompressionset	encodingsZhas_dictionary_pageZdictionary_page_offsetZdata_page_offsetZtotal_compressed_sizeZtotal_uncompressed_sizeNotImplementedErrorZhas_index_pageZindex_page_offset)	dffilehZncolsmetar'   colZrgrg_metacol_meta r@   d/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/tests/parquet/test_metadata.pytest_parquet_metadata_api4   s    





rB   c                 C   sJ   t ddddgi}t|| d  t| d }|jddj d S )Nar   r      ztest_metadata_segfault.parquetr   )	patabler/   write_tableParquetFiler   r.   r0   r5   )tempdirrF   Zparquet_filer@   r@   rA   test_parquet_metadata_lifetime   s    rJ   )datatyper)   	min_value	max_value
null_countr3   distinct_countr   r      ZINT32ZINT64r   gg@gffffff@g@FLOATDOUBLEr   b  ZaaaZ
BYTE_ARRAY    zutf-8TFr             bs   12s   aaac                 C   s   t d| i}ttd|g}	tjj||	dd}
t|
}|j}|	d}|
d}|j}|jsftt||j|sxtt||j|st|j|kst|j|kst|j|kst|j|kstd S )NrK   F)r'   safer   )pd	DataFramerE   r'   fieldTablefrom_pandasr   r   r.   r0   r5   has_min_maxr   _closeminmaxrO   r3   rP   r)   )rK   rL   r)   rM   rN   rO   r3   rP   r:   r'   rF   r;   r<   r>   r?   statr@   r@   rA   "test_parquet_column_statistics_api   s    .


re   c                 C   sD   | t  krt|| dk S | t  kr8t|| dk S ||kS d S )NgHz>gvIh%<=)rE   float32absfloat64)rL   leftrightr@   r@   rA   ra      s
    ra   c                  C   sf   t dt jt jgddi} ttj| j}|	d
djjrFt|	d
djjd ksbtd S )Ntzdatetime64[ns])Zdtyper   )r[   r\   ZSeriesZNaTr   rE   r^   r_   r   r.   r0   r5   r`   r   rc   )r:   r<   r@   r@   rA   &test_parquet_raise_on_unset_statistics   s    rl   c           
      C   s  ddt  fddt  fddt  ftddddtd	dddt d
ftddddtd	dddt dftdddddddtdddddddt d
ftdddddddtdddddddt dft	dddt	dddt 
 ftdtdt ddfg	}t|D ]\}\}}}t jt j||g|dgdg}t| d| }tj||dd t|}|jddj}	|	j|kst|	j|kstqd S )N
   l   5f|~W	 l    u   ähnlichu   öffentlich   r   rU      msusi           z20.123z20.124   rW   rL   r=   zexample{}.parquetr   version)rE   uint64uint32utf8datetimetimeZtime32Ztime64	timestampdateZdate32decimalDecimalZ
decimal128	enumerater^   Zfrom_arraysarraystrformatr/   rG   rH   r   r.   r0   r5   rb   r   rc   )
rI   ZcasesiZmin_valZmax_valtyprk   pathpfstatsr@   r@   rA   %test_statistics_convert_logical_types   sH    

r   c              
   C   sb  t jtdt dddgfdt dddgfg}t|| d  t| d }dD ]0}|d		|}|j
d
ksvt|jd k	sTtqTt|| d dd t| d }dD ]0}|d		|}|j
dkst|jd kstqt|| d dgd t| d }|d		d	}|d		d}|j
d
ks.t|j
dks>t|jd k	sNt|jd ks^td S )NrC   r   r   rD   rT   cdata.parquet)r   r   r   Tzdata2.parquetF)Zwrite_statisticszdata3.parquet)rE   r^   Zfrom_pydictr   r   r   r/   read_metadatar.   r0   r4   r   r5   )rI   rF   r<   r=   ccZcc_aZcc_br@   r@   rA   %test_parquet_write_disable_statistics  s4    r   c               
   C   s2  d} t jdt  | did}t jdt |g| did}t jdt  dd	| d
idt jdt t jdt  | did| didt jdt |g| didt dt  t jdt  | didt jdt  | didg}dd |D }t j|t |d}t  }t	|| |
 }tt |}|j}	|	d j|  d
ksBt|	d jd d	ksZt|	d }
|
j|  dksvt|
jj}|j|  dkst|	d }|j|  dkst|jd }|j|  dkst|jd }|j|  dkst|	d jd kst|	d j|  dkst|	d j|  dks.td S )Ns   PARQUET:field_idinners   100r   middles   101basics   others   abc   1listz
list-inners   10s   11structs   102zno-metadataznon-integral-field-ids   xyzznegative-field-ids   -1000c                 S   s   g | ]}g qS r@   r@   ).0_r@   r@   rA   
<listcomp>E  s     z*test_field_id_metadata.<locals>.<listcomp>r'   r   r   r   rD   rQ   rW   )rE   r]   int32r   list_rF   r'   BufferOutputStreamr/   rG   getvaluerH   BufferReaderZschema_arrowr   r   rL   Zvalue_field)Zfield_idr   r   fieldsZarrsrF   Zbiocontentsr   r'   Z
list_fieldZlist_item_fieldZstruct_fieldZstruct_middle_fieldZstruct_inner_fieldr@   r@   rA   test_field_id_metadata0  sf    
   

r   c                  C   s|   dD ]r} t ddddgi}t  }t||| d t | }t|}|d	d}|j
| ksht|j| kstqd S )N)FTrC   r   r   rD   )write_page_indexr   )rE   rF   r   r   r   r   r/   r   r.   r0   Zhas_offset_indexr   Zhas_column_index)r   rF   writerreaderr   r   r@   r@   rA   test_parquet_file_page_indexh  s    
r   c              	   C   sf  ddg}t | d }tdddgddd	gddgddgdd
ggd}tj|}d }|D ]N}g }tj|t | | |d |d | |d kr|d }qX|	|d  qXt
|d}|| W 5 Q R X t|}| }	| }
|
D ] }|dkr|
| |	| kstq|
d dkst|
d dks,t|
d dks>t|
d dksPt|	d dksbtd S )NzARROW-1983-dataset.0zARROW-1983-dataset.1	_metadatar   r   rD   r   rQ   )onetwothreeZmetadata_collectorr   wbr$   r    r   rr   r!   )r   r[   r\   rE   r^   r_   r/   rG   Zset_file_pathappend_row_groupsopenwrite_metadata_filer   to_dictr   )rI   	filenamesZmetapathr:   rF   _metafilenamer<   fmdZ_mdkeyr@   r@   rA   test_multi_dataset_metadataw  s>    

r   z#ignore:Parquet format:FutureWarningc           
   	   C   sJ  t | d }tddg}t|| t|}|j }||sHt|j	r\d|j	ks\tdD ]<}tj|||d t|}|dkrdnd}|j
|ks`tq`tjd	d
gddgd|d}t|| d  tt | d }tj||||gd t|}|jd
ks
td}	tjt|	d$ tjtddg|||gd W 5 Q R X d S )Nr   )rC   int64)rT   rh   s   ARROW:schema)1.0z2.0z2.4r   rw   r   r   r   r   g?g?)rC   rT   r   r   r   zLAppendRowGroups requires equal schemas.
The two columns with index 0 differ.match)rC   r   )rT   null)r   rE   r'   r/   write_metadatar   Zto_arrow_schemaequalsr   r   r"   rF   rG   r!   r*   r+   RuntimeError)
rI   r   r'   Zparquet_metaZschema_as_arrowrx   Zexpected_versionrF   Zparquet_meta_multmsgr@   r@   rA   test_write_metadata  s<    


  
 r   c                  C   sB   t jt ddgddd id} t jtdg| d}t| d S )	NZf0doubleZlargexi r   rm   r   )rE   r'   r]   rF   r   aranger   )Z	my_schemarF   r@   r@   rA   test_table_large_metadata  s
    
r   c                  C   sP  t dd} t| }t| }t| | jd d d  }t|jtjsDt|j|jsVt|j|jksft|j|jsxt|j|jkst|jdkst|j|jrt|j|jkstt|jd tj	st|jd |jd st|jd |jd kst|jd |jd rt|jd |jd ks8t|jd dksLtd S )Nr	   r
   r   zarbitrary objectr   r   )
r   r   r   r#   r'   r/   ZParquetSchemar   r   ZColumnSchema)r:   r;   Zfileh2Zfileh3r@   r@   rA   test_compare_schemas  s$    
r   c                 C   s   d}t jt|tj|dddgd}| d }tj|}t	|| t
|}t
j|dd}|j|spt|j|st|jjd	 |jd	 kstd S )
Nd   )r   valuesr   r   r   ztest.parquetT)Z
memory_maps   pandas)r[   r\   r   r   r   randnrE   r^   r_   r   r/   read_schemar'   r   r   r   )rI   Nr:   Z	data_pathrF   read1Zread2r@   r@   rA   test_read_schema  s    


r   c                 C   s   t dt jg ddi}t|| d  t| d }| }t|d dksRtt|d d d dksnt|d d d d d	 d kstd S )
NrC   r   rv   r   Z
row_groupsr   r   r   r5   )	rE   rF   r   r/   rG   r   r   r   r   )rI   rF   r   metadata_dictr@   r@   rA   #test_parquet_metadata_empty_to_dict  s    r   c               	   C   s   d} d}t dd t| D }t  }t|| | }W 5 Q R X tt |}tt |}t|D ]}|	| qpt  }|
| | }W 5 Q R X tt |}d S )NrU   i  c                 S   s   i | ]}t |tjd qS )rm   )r   r   r   r   )r   r   r@   r@   rA   
<dictcomp>  s      z6test_metadata_exceeds_message_size.<locals>.<dictcomp>)rE   rF   r-   r   r/   rG   r   r   r   r   r   )ZNCOLSZNREPEATSrF   outbuforiginal_metadatar   r   r@   r@   rA   "test_metadata_exceeds_message_size  s    


r   c              	   C   s6  t ddddgi}d}t| | }d| }t|| t| | }|j}t||s`ttj|t	 d|szttj|d|  d|stt
||sttj
|t	 d|sttj
|d|  d|stt| > tj|t	 d|sttj
|t	 d|s(tW 5 Q R X d S )NrC   r   r   rD   r   zfile:///
filesystem)rE   rF   r   r/   rG   r   r'   r   r   r   r   r   Z
change_cwd)rI   rF   fnamer2   Zfile_urir   r'   r@   r@   rA   test_metadata_schema_filesystem-  s\         
 r   c               	   C   sx   t ddddgi} t  }t| | | }W 5 Q R X tt |}d}tj	t
|d |d  W 5 Q R X d S )NrC   r   r   rD   z#Argument 'other' has incorrect typer   )rE   rF   r   r/   rG   r   r   r   r*   r+   	TypeErrorr   )rF   r   r   r   r   r@   r@   rA   test_metadata_equalsP  s    
r   zt1,t2,expected_errorcol1rm   col2z$The two columns with index 0 differ.)r   r   Zcol3z&This schema has 2 columns, other has 1c           
   	   C   s   t | }t |}t }t }t|| t|| |d |d t|j}t|j}|rd}	t	j
t|	| d || W 5 Q R X n
|| d S )Nr   z(AppendRowGroups requires equal schemas.
r   )rE   rF   ioBytesIOr/   rG   seekrH   r   r*   r+   r   r   )
t1t2Zexpected_errorZtable1Ztable2Zbuf1Zbuf2meta1meta2prefixr@   r@   rA   $test_metadata_append_row_groups_diff\  s    



r   c              	   C   s  |\}}| d }| d }| d }| d }| d}t dtdi}	t|	j|g  tj|	j|g t d t|	j| g  |d	}
t|	j|
g  W 5 Q R X tj|	j|g |d |	 |	   kr|	   kr|	   kr||
 ksn td S )
Nr   r   meta3meta4z/meta5r=   rW   r   zwb+)rE   rF   r-   r/   r   r'   r   as_urir   
read_bytesreadr   )rI   Zs3_example_s3fsZs3_fsZs3_pathr   r   r   r   Zmeta5rF   Zmeta4_streamr@   r@   rA   (test_write_metadata_fs_file_combinationsz  s(    
r   )Fr|   r   collectionsr   r   numpyr   r*   ZpyarrowrE   Zpyarrow.tests.parquet.commonr   r   Z
pyarrow.fsr   Zpyarrow.testsr   Zpyarrow.parquetZparquetr/   r   ImportErrorZpandasr[   Zpandas.testingtestingtmr   markZ
pytestmarkrB   rJ   ZparametrizeZuint8Zuint16rz   ry   Zint8Zint16r   r   rf   rh   chrbinaryencodeZbool_re   ra   rl   r   r   r   r   r   filterwarningsr   r   r   r   r   ZslowZlarge_memoryr   r   r   r-   r   Zs3r   r@   r@   r@   rA   <module>   s   

Z                                      *

#8
*

+	

#
