U
    -eG                    @   sZ  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	Z
d dlmZ d dl	mZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ z$d dlmZ d dlmZmZm Z  W n e!k
r   dZY nX zd dl"Z#d dl$m%Z& W n e!k
r   d Z#Z&Y nX ej'jZ(ej'j"d	d
 Z)ej'j"dd Z*ej'+ddd Z,dd Z-edd Z.ej'j"edd Z/ej'+dej'j"dd Z0ej'j"edd Z1ej'j"edd Z2ej'j"edd Z3ej'j"eej'j4e5e6fdd d!d" Z7ej'j"ej'j8d#d$ Z9ej'j"ed%d& Z:ej'j"ed'd( Z;ej'j"ed)d* Z<ej'j"ed+d, Z=ej'j"eej'>d-d.gd.gge?d/d0k e?d1d2d0k e?d1d3@e
A d0k fej'>d4d5d6d7 ZBej'j"ed8d9 ZCej'jDed:d; ZEej'jDed<d= ZFej'j"ed>d? ZGej'j"ej'jDed@dA ZHej'j"ej'jDedBdC ZIddEdFZJdGdH ZKdIdJ ZLej'j"ej'+dKdLdM ZMej'j"ej'+dKdNdO ZNdPdQ ZOeej'j"dRdS ZPej'j"ej'+dTedUdV ZQej'j"edWdX ZRej'+dej'j"edYdZ ZSej'j"ed[d\ ZTej'j"ed]d^ ZUddadbZVdcdd ZWej'j"eej'>dedfdggdhdi ZXej'j"edjdk ZYej'j"edldm ZZej'j"eej'>dedfdggdndo Z[ej'j"edpdq Z\edrds Z]ej'+dKddtduZ^ddvdwZ_ej'j"edxdy Z`ej'j"edzd{ Zaej'j"ed|d} Zbej'j"ed~d Zcej'j"edd Zdej'j"ej'jDedd Zeej'j"ej'jDedd Zfej'j"ej'jDedd Zgej'+ddej'j"edd Zhej'j8ej'j"dd ZidddZjdddZkej'j"edd Zlej'j"edd Zmej'j"edd Znej'j"edd Zoej'j8ej'j"ej'+ddd Zpej'j8dd Zqej'j8ej'+ddd Zrej'j8dd Zsej'+dedd Ztej'j8dd Zuej'j8dd Zvej'j8dd Zwej'j8dd Zxej'j8dd Zyej'j8ej'>dddd Zzej'j"edd Z{dS )    N)fs)LocalFileSystem)util)parametrize_legacy_dataset parametrize_legacy_dataset_fixed(parametrize_legacy_dataset_not_supported)guid)Version)_read_table_test_dataframe_write_tablec              	   C   sd   t d}tj|}| d }t||dd tt t	|}W 5 Q R X |
 }||s`td S )N  parquet_piece_read.parquet2.6version)r   paTablefrom_pandasr   pytestwarnsFutureWarningpqParquetDatasetPiecereadequalsAssertionError)tempdirdftablepathpiece1result r#   c/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/tests/parquet/test_dataset.pytest_parquet_piece_read;   s    r%   c              	   C   s   t d}tj|}| d }t||dd tt t	|}W 5 Q R X |
 }t|tjsbt| }t|tjszt||std S )Nd   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   
isinstancer   get_metadataZFileMetaDatar   )r   r   r   r    piecetable1Zmeta1r#   r#   r$   (test_parquet_piece_open_and_get_metadataJ   s    r+   z(ignore:ParquetDatasetPiece:FutureWarningc                  C   s   d} t | }t j| dd}t j| dddgd}t|| ks@tt|dksPtt|dks`t||kslt||ksxt||kst||kstd S )	Nz	/baz.parq   )	row_group)foor   )barr,   )r-   Zpartition_keysz/baz.parq | row_group=1z/partition[foo=0, bar=1] /baz.parq | row_group=1)r   r   strr   )r    r!   Zpiece2Zpiece3r#   r#   r$   test_parquet_piece_basics]   s    
  r1   c               	   C   s   t ddddg} t ddddg}t| jtjs6tt|jtjsHtt dtdd	d	g}t	
t |j W 5 Q R X d S )
Nkey1r.   r/   Zbazkey2i  i  i  r,   )r   ZPartitionSetr'   
dictionaryr   ZStringArrayr   ZIntegerArraydatetimer   raises	TypeError)Zset1Zset2Zset3r#   r#   r$   "test_partition_set_dictionary_typep   s    r8   c                 C   s   t ddddgi}| d }|  |d }t|t| tj|t |d}|	|s^t
tjdt| |d}|	|st
d S )	Nar,         data_dirdata.parquet
filesystemuse_legacy_datasetzdata_dir/data.parquet)r   r   mkdirr   write_tabler0   
read_tabler   r   r   r   r   Z_filesystem_uri)r   r@   r   	directoryr    r"   r#   r#   r$   test_filesystem_uri|   s"      rE   c                 C   s   t  }t|| | d S N)r   _get_instance_partition_test_for_filesystem)r   r@   r   r#   r#   r$   test_read_partitioned_directory   s    rI   z$ignore:'ParquetDataset:FutureWarningc              	   C   s   t  }| }t|| tj||dd}tjtdd tj||ddd}W 5 Q R X t	|j
dksdt|j}t	|jdks|t|j|jjkstt	|jt	|jjkstd S )	Nr,   )r?   metadata_nthreadsz"Specifying the 'metadata_nthreads'match   T)r?   rJ   r@   r   )r   rG   rH   r   ZParquetManifestr   r   r   ParquetDatasetlenpiecesr   
partitionsZpartition_namesZlevels)r   r   	base_pathmanifestdatasetrQ   r#   r#   r$   *test_create_parquet_dataset_multi_threaded   s*    
   rU   c                 C   s`   t  }| }t|| tj||d}|jdgd}|rL|jdddgks\tn|jdgks\td S )Nr@   valuescolumnsr.   r/   )r   rG   rH   r   rN   r   column_namesr   )r   r@   r   rR   rT   r"   r#   r#   r$   'test_read_partitioned_columns_selection   s    
 r[   c                 C   s~  t  }| }ddg}dddg}ddg}d|gd	|gd
|gg}tjtj|dddtttj|tdddtttj|ddddddd	d
gd}t	|||| t
j||dddg|d}	|	 }
|
 jdd}d|d jkstd|d	 jkstd|d
 jkstdddgddgg}t
j||||d}	|	 }
|
 jdd}|d dk|d	 dk@ |d
 dk@ }t|d dk|d
 dk@ }| dkst| dkst|jd | |  kst|r<tt  dgg}t
j|||dd W 5 Q R X tt  dgg}t
j|||dd W 5 Q R X n>dggdggfD ],}t
j|||dd}	|	 jdksLtqLd S )Nr   r,   r9   bcTFintegerstringbooleani4Zdtype      r:   boolr;   r^   r_   r`   rX   )r^   =r,   )r_   !=r\   )r`   ==Truer?   filtersr@   drop)r^   rg   r   )r`   ri   Falserj   ro   )r_   ri   s   1 a)r_   ri   z1 a)r   rG   pd	DataFramenparrayrepeattileobject_generate_partition_directoriesr   rN   r   	to_pandasreset_indexrW   r   sumshaper   r6   NotImplementedErrornum_rows)r   r@   r   rR   integer_keysstring_keysboolean_keyspartition_specr   rT   r   	result_dfrl   Zdf_filter_1Zdf_filter_2r#   r#   r$   test_filters_equivalency   s    
   



 

  r   c                 C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| tj	||ddg|d}|
 }	|	 jddjdd}
dd tt|
d jD }|ddgkstd S )Nr   r,   r:   r;      integersrd   ra   rb   indexr   r   rX   )r   <r   )r   >r,   rk   ZbyTrm   c                 S   s   g | ]}|qS r#   r#   .0xr#   r#   r$   
<listcomp>>  s     z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>r   rG   rp   rq   rr   arangers   rw   r   rN   r   rx   sort_valuesry   mapintrW   r   r   r@   r   rR   r~   r   Nr   rT   r   r   Zresult_listr#   r#   r$   %test_filters_cutoff_exclusive_integer  s:     r   z5Loss of type information in creation of categoricals.)r6   reasonc              	   C   s  t  }| }tdddtdddtdddtdddtdddg}d|gg}d	}tjt|tj|d
ddddgd}t	|||| t
j||ddg|d}| }	|	 jddjdd}
tjtjtdddgd
dtj|d
dd}|
d j|kstd S )Ni  r   	   
            datesrd   Z
datetime64rb   )r   r   r   rX   )r   r   z
2018-04-12)r   r   z
2018-04-10rk   r   Trm   
categories)r   rG   r5   daterp   rq   rr   r   rs   rw   r   rN   r   rx   r   ry   CategoricalrW   r   )r   r@   r   rR   Z	date_keysr   r   r   rT   r   r   expectedr#   r#   r$   &test_filters_cutoff_exclusive_datetimeB  sJ    	 r   c              	   C   sv   | d }t t jddddtddj|dd tj|d	d
tdddfgd}|d	 dddddgksrt
d S )Nztimestamps.parquetz
2020-01-01r   D)Zperiodsfreq)r   idT)Zuse_deprecated_int96_timestampsr   <=i  r,   rd   )rl   r   r   r:   r;   r   )rp   rq   Z
date_rangerangeZ
to_parquetr   rC   r5   column	to_pylistr   )r   r    r   r#   r#   r$   test_filters_inclusive_datetimeu  s     r   c                 C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| tj	||ddg|d}|
 }	|	 jddjdd}
dd tt|
d jD }|ddgkstd S )Nr   r,   r:   r;   r   r   rd   ra   rb   r   r   rX   )r   r   r;   )r   z>=r:   rk   r   Trm   c                 S   s   g | ]}t |qS r#   )r   r   r#   r#   r$   r     s     z2test_filters_inclusive_integer.<locals>.<listcomp>r   r   r#   r#   r$   test_filters_inclusive_integer  s:     r   c                 C   s  t  }| }ddg}dddg}ddg}d|gd	|gd
|gg}tjtj|dddtttj|tdddtttj|ddddddd	d
gd}t	|||| t
j||dg|d}	|	 }
|
 jdd}d|d	 jkstd|d	 jkstd|d	 jkstt
j||dddgfdd
ddhfg|d}	|	 }
|
 jdd}d|d jksbtd|d	 jksvtd|d
 jkstd S )Nr   r,   r9   r\   r]   TFr^   r_   r`   ra   rb   rc   rd   r:   re   r;   rf   rX   )r_   inabrk   rm   r   )r_   r   r9   r\   znot inro   )r   rG   rp   rq   rr   rs   rt   ru   rv   rw   r   rN   r   rx   ry   rW   r   )r   r@   r   rR   r~   r   r   r   r   rT   r   r   r#   r#   r$   test_filters_inclusive_set  sV    
  
r   c           	   	   C   s  t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| t	t
 tj||dg|d W 5 Q R X t	t tj||dg|d W 5 Q R X |rt	t" tj||ddt fg|d W 5 Q R X n0tj||ddt fg|d}| jdkst|rTt	t" tj||dddhfg|d W 5 Q R X nFtj||dddhfg|d}t	t | jdkstW 5 Q R X d S )Nr   r,   r:   r;   r   r   rd   ra   rb   r   r   rX   )r   r   r;   rk   )r   z=<r;   r   rh   )r   rG   rp   rq   rr   r   rs   rw   r   r6   r7   r   rN   
ValueErrorsetr   r}   r   r|   )	r   r@   r   rR   r~   r   r   r   rT   r#   r#   r$   test_filters_invalid_pred_op  sh    r   c           	   	   C   s   t  }| }dddddg}d|gg}d}tjt|tj|dd	d
ddgd}t|||| d}tj	t
|d tj||dg|d  W 5 Q R X d S )Nr   r,   r:   r;   r   r   rd   ra   rb   r   r   rX   z1No match for FieldRef.Name\(non_existent_column\)rK   )Znon_existent_columnr   r;   rk   )r   rG   rp   rq   rr   r   rs   rw   r   r6   r   r   rN   r   )	r   r@   r   rR   r~   r   r   r   msgr#   r#   r$   test_filters_invalid_column  s$    
r   rl   )r   r   r;   r   r;   nestedr9   r\   read_method)rC   read_pandasc              	   C   s   t t|}t }| }dddddg}d|gg}t|}	tt|	tj	|ddt	d	d
 t
|	D d}
t||||
 t|||d}|rt|tjrd}tjt|d ||f| W 5 Q R X n||f|}|jdkstd S )Nr   r,   r:   r;   r   r   ra   rb   c                 S   s   g | ]}|t |d qS )r   )r0   r   ir#   r#   r$   r   K  s     z+test_filters_read_table.<locals>.<listcomp>)r   r   r   rk   z6Expressions as filter not supported for legacy datasetrK   )getattrr   r   rG   rO   rp   rq   rr   r   rs   r   rw   dictr'   pc
Expressionr   r6   r7   r}   r   )r   r@   rl   r   r   r   rR   r~   r   r   r   kwargsr   r   r#   r#   r$   test_filters_read_table3  s,    

r   c           
      C   s   t  }| }ddg}d|gg}d}tjt|tj|dddddgd	}t|||| tj	||d
}|
 }	|	d |kstd S )NZ2019_2Z2019_3	year_weekr:   rv   rb   )r   r   r   rX   rV   )r   rG   rp   rq   rr   r   rs   rw   r   rN   r   r   r   r   )
r   r@   r   rR   r   r   r   r   rT   r"   r#   r#   r$   $test_partition_keys_with_underscores]  s&     r   c                 C   sR   | \}}|d }t ddddgi}t|||d t|||d}||sNtd S Nz/test.parquetr9   r,   r:   r;   r?   r>   r   r   r   r
   r   r   )s3_example_s3fsr@   r   r    r   r"   r#   r#   r$   test_read_s3fsx  s      r   c                 C   sR   | \}}|d }t ddddgi}t|||d t|||d}||sNtd S r   r   )r   r@   r   rD   r    r   r"   r#   r#   r$   test_read_directory_s3fs  s      r   c                 C   sP   t | d }tddddgi}t|| tj|g|d }||sLtd S )Nr=   r9   r,   r:   r;   rV   )	r0   r   r   r   r   rN   r   r   r   )r   r@   	data_pathr   r"   r#   r#   r$   test_read_single_file_list  s    
 
r   c              	   C   s|   dd l }ddlm} t|jtdkr0td | \}}tt ||}W 5 Q R X t	|| t
j|||d}|  d S )Nr   )S3FSWrapperz0.5z+S3FSWrapper no longer working for s3fs 0.5+r>   )s3fspyarrow.filesystemr   r	   __version__r   skipr   r   rH   r   rN   r   )r   r@   r   r   r   r    wrapperrT   r#   r#   r$   ,test_read_partitioned_directory_s3fs_wrapper  s    

  r   c                 C   s   | \}}t |||d d S )NrV   )rH   r   r@   r   r    r#   r#   r$   $test_read_partitioned_directory_s3fs  s      r   Tc                 C   sp  ddg}dddg}d|gd|gg}d}t jt|tj|d	d
dtttj|td
ddtj	|dddddgd}t
| ||| tj|| |d}| }	|	 jddjdd}
|jddjddj|
jd}|stt jtdk r t j|d |d|d< t j|d |d|d< n$|d d|d< |d d|d< |
jddddgk s`tt|
| d S )Nr   r,   r9   r\   r]   r.   r/      ra   rb   rc   rd   r:   )r   r.   r/   rW   r   rW   rX   r>   r   Trm   z2.0.0r   category)rp   rq   rr   r   rs   rt   ru   rv   randomrandnrw   r   rN   r   rx   r   ry   reindexrY   r	   r   r   astypeallr   tmassert_frame_equal)r   rR   r@   Zfoo_keysZbar_keysr   r   r   rT   r   r   Zexpected_dfr#   r#   r$   rH     sL    


  rH   c                    sB   t  tdtdd fdd|dg  d S )Npathsepsep/c              
      s  | \}}|D ]}|||fg } t| d||g}| | d krЈ |t g}t|}	tj|	}
	|d}t
|
| W 5 Q R X |st |dg}	|d}W 5 Q R X q||d |  |dg}	|d}W 5 Q R X qd S )Nz{}={}r,   wbZ_SUCCESS)joinr0   formatrA   r   _filter_partitionr   r   r   openr   existsr   )base_dirlevel	part_keysnamerW   valueZthis_part_keysZ	level_dir	file_pathZfiltered_dfZ
part_tablefZfile_successZDEPTH_visit_levelr   r   r   r   r#   r$   r     s,    


z5_generate_partition_directories.<locals>._visit_levelr   )rO   r   )r   r   r   r   r#   r   r$   rw     s    rw   c              	   C   sL  dd l }dd lm} d}|jt|tj|dddgd}t|}t	j
|d}tj|}| |d}t|| W 5 Q R X t	j
|d	}	| |	d}||j| W 5 Q R X |j|| d
d}
tt |
jt|	kstW 5 Q R X | |}||j}W 5 Q R X |
j|s"t|j|g| d
d}|j|
jsHtd S )Nr   r&   r   rW   r   rW   rX   r=   r   _common_metadataTr>   )pandaspyarrow.parquetparquetrq   rr   r   r   r   r0   osr    r   r   r   r   r   r   write_metadataschemarN   r   r   r   common_metadata_pathr   read_metadatar   )r   rR   rp   r   r   r   r   r   r   metadata_pathrT   Zcommon_schemadataset2r#   r#   r$    _test_read_common_metadata_files  s:    

r   z+ignore:'ParquetDataset.schema:FutureWarningc                 C   s   t  }t||  d S rF   )r   rG   r   )r   r   r#   r#   r$   test_read_common_metadata_files?  s    r   c           
   	   C   s  t  }d}tjt|tj|dddgd}| d }tj	
|}||d}t|| W 5 Q R X | d }||d}t|j| W 5 Q R X tj| |d	d
}tt |jt|kstW 5 Q R X ||}t|j}	W 5 Q R X |j|	std S )Nr&   r   r   rW   rX   r=   r   	_metadataTr>   )r   rG   rp   rq   rr   r   r   r   r   r   r   r   r   r   r   r   rN   r   r   r   r   r0   r   r   r   )
r   r   r   r   r   r   r   r   rT   Zmetadata_schemar#   r#   r$   test_read_metadata_filesF  s.    
r   c                 C   sl   t jt| td}g }|D ]>\}}|| t|tjtjfrHt	|}|| | |kM }q| | j
|ddS )Nrb   r,   )Zaxis)rr   ZonesrO   re   appendr'   r5   r   rp   	Timestamprn   )r   r   	predicateZto_dropr   r   r#   r#   r$   r   f  s    

r   c                 C   s   | d }|   tjtddddgi}t||d  | d }|   tjtddd	d
gi}t||d  tj| dgg|d}|	d
tdddggstd S )NzA=0Br,   r:   r;   r=   zA=1r9   r\   r]   )Ari   r   )rl   r@   )rA   r   r   r   rp   rq   r   rB   rC   r   r   Zchunked_arrayr   )r   r@   Zdir1r*   Zdir2Ztable2r   r#   r#   r$   "test_filter_before_validate_schemav  s    r  z.ignore:Specifying the 'metadata':FutureWarningc              
      s  d}d}| t   }|  g }g }t|D ]\}t||d}|d tj|d< |d| }	tj	
|}
t|
|	 ||
 ||	 q*|d   dfdd		}|| t|} |stt|d
 }r2|||d}||sttjtdd tj||jdd }W 5 Q R X ||s\tn*tjtdd tj||dd W 5 Q R X d
dd jd g} fdd|D }tj||d}tj	j fdd|D | jjd}||sttj|dd t||djd d d df }| dt   }tj	
|}t|| s"d S t|}tt |||g  W 5 Q R X tt |||d W 5 Q R X ||d
 g}tt. tjtdd |||jd W 5 Q R X W 5 Q R X tt || W 5 Q R X d S )Nr   rd   seedZuint32
{}.parquetz_SUCCESS.crcTc                    s$   t j| fd i|}|j||dS )Nr@   )rY   use_threads)r   rN   r   )pathsrY   r  r   rT   rV   r#   r$   read_multiple_files  s    z5test_read_multiple_files.<locals>.read_multiple_filesr   )metadatazSpecifying the 'schema'rK   r   r@   no longer supportedF)r  r@   r:      r,   c                    s   g | ]}  |jqS r#   )fieldr   r   r"   r#   r$   r     s     z,test_read_multiple_files.<locals>.<listcomp>)rY   r@   c                    s   g | ]}  |qS r#   )r   r   r  r#   r$   r     s     )namesr  )r  r@   r   r   )NT) r   rA   r   r   r   rr   int64r   r   r   r   r   r   touchZconcat_tablesr   r   r   r   r   r   r   rN   r   r   r6   r   rC   num_columnsZfrom_arraysr  Ziloc)r   r@   nfilessizedirpath	test_datar	  r   r   r    r   r
  r   r  Zresult2Zresult3Zto_readZ	col_namesoutZ	bad_appleZbad_apple_pathtZbad_metaZmixed_pathsr#   )r"   r@   r$   test_read_multiple_files  s    




    

"r  c                    s.  d}d}| t   }|  g }g }g }t|D ]t}t||d}	t|| |d | |	_d|	j_|d| }
t	j
|	}t||
 || ||	 ||
 q.tj||d}ddg |j d	 }t fd
d|D }t|| |jt d	 }|j|jkstt|j|jd	| d S )Nrd   r  r,   r   r  rV   Zuint8stringsrX   c                    s   g | ]}|  qS r#   r#   r   rX   r#   r$   r     s     z,test_dataset_read_pandas.<locals>.<listcomp>)r   rA   r   r   rr   r   r   r   r   r   r   r   r   r   r   rN   r   rx   rp   concatr   r   r   r{   r   r   rY   )r   r@   r  r  r  r  framesr	  r   r   r    r   rT   r"   r   r#   rX   r$   test_dataset_read_pandas  s2    



r   c                 C   s   | t   }|  tddd}|dd }tj|}t||dd tj	|d|d}|
 |sht|r|jd 
 |std S )	Nr   r   r  r  r   r   T)
memory_mapr@   )r   rA   r   r   r   r   r   r   r   rN   r   r   r   rP   )r   r@   r  r   r    r   rT   r#   r#   r$   test_dataset_memory_map  s    
  r"  c              	   C   s   | t   }|  tddd}|dd }tj|}t||dd t	t
 tj|d|d W 5 Q R X d	D ]&}tj|||d}| |sptqpd S )
Nr   r   r  r  r   r   i)buffer_sizer@   )   i   )r   rA   r   r   r   r   r   r   r   r6   r   r   rN   r   r   r   )r   r@   r  r   r    r   r#  rT   r#   r#   r$   #test_dataset_enable_buffered_stream.  s&    
  r%  c           	      C   s   | t   }|  tddd}|dd }tj|}t||dd dD ]D}tj	|||d}|
 |spttj|||d}||sJtqJd S )	Nr   r   r  r  r   r   )TF)
pre_bufferr@   )r   rA   r   r   r   r   r   r   r   rN   r   r   r   rC   )	r   r@   r  r   r    r   r&  rT   actualr#   r#   r$   test_dataset_enable_pre_bufferE  s"    
 r(  r   rd   c                 C   sN   g }g }t |D ]8}t||d}| d| }|t|| || q|S )Nr  r  )r   r   r   r   r   )rR   r  
file_nrowsr  r	  r   r   r    r#   r#   r$   _make_example_multifile_datasetZ  s    r*  c                 C   sR   |r(t tt|dd | jD ksNtn&dd |D }t |t | jjksNtd S )Nc                 S   s   h | ]
}|j qS r#   )r    r   r#   r#   r$   	<setcomp>h  s     z(_assert_dataset_paths.<locals>.<setcomp>c                 S   s   g | ]}t | qS r#   )r0   as_posix)r   r    r#   r#   r$   r   j  s     z)_assert_dataset_paths.<locals>.<listcomp>)r   r   r0   _piecesr   Z_datasetfiles)rT   r	  r@   r#   r#   r$   _assert_dataset_pathsf  s    $r/  
dir_prefix_.c                 C   sP   | t   }|  t|ddd}|d|   tj||d}t||| d S )Nr   rd   r  r)  z	{}stagingrV   )r   rA   r*  r   r   rN   r/  r   r0  r@   r  r	  rT   r#   r#   r$   test_ignore_private_directoriesn  s    
r5  c              	   C   s   | t   }|  t|ddd}|d d}|d W 5 Q R X |d d}|d W 5 Q R X tj||d}t||| d S )	Nr   rd   r3  z	.DS_Storer   s	   gibberishz.privaterV   r   rA   r*  r   writer   rN   r/  r   r@   r  r	  r   rT   r#   r#   r$   test_ignore_hidden_files_dot  s    
r9  c              	   C   s   | t   }|  t|ddd}|d d}|d W 5 Q R X |d d}|d W 5 Q R X tj||d}t||| d S )	Nr   rd   r3  Z_committed_123r   s   abcdZ_started_321rV   r6  r8  r#   r#   r$   #test_ignore_hidden_files_underscore  s    
r:  c                 C   sf   | d | t  }|jdd t|ddd}tj||d}t||| tj||d}t||| d S )Nz{0}dataTparentsr   rd   r3  rV   )r   r   rA   r*  r   rN   r/  r4  r#   r#   r$   /test_ignore_no_private_directories_in_base_path  s    r=  c                 C   s   dgd dgd  }t jt tt|t | gddgd}tj|t| dgd | d }|	  tj|t|dgd tj
| |d	gd
}||std S )NZxxxr;   Zyyyr   Z_partr  )partition_colsZ_private_duplicateZ_private)r@   Zignore_prefixes)r   r   rs   r   rO   dictionary_encoder   write_to_datasetr0   rA   rC   r   r   )r   r@   partr   Zprivate_duplicater   r#   r#   r$   test_ignore_custom_prefixes  s&     rC  c                 C   sF   | d }|   tj||d}| }|jdks4t|jdksBtd S )NrT   rV   r   )rA   r   rN   r   r}   r   r  )r   r@   Z	empty_dirrT   r"   r#   r#   r$   test_empty_directory  s     rD  c                 C   s  dd l }dd lm} dd lm} |tdtdttdtj	gd tj
ddddd	d
}|j }	ddg}
tjj||ddd}|j|| |
||d tjt| d}|d k	r||d}||j| W 5 Q R X n||j| |j| |d|d}|r.tjtdd t|j j}W 5 Q R X nt|jj}|t|jjksPt |! }|" }|j }|
|dt#|
 d  kst ||	 }|
D ]}|| d||< q|r|$dj%& }|d ||d< |'|| d S )Nr   
aaabbbbccc
eefeffgeeer   
2017-01-01
2017-01-11datetime64[D]rb   datetime64[ns]group1group2numnanr   rL  rM  F)r   safeZpreserve_indexr>   r   r   T)r?   validate_schemar@   'ParquetDataset.schema'rK   r   r   )(r   pandas.testingtestingr   r   rq   listr   rr   rO  r   r   rY   tolistr   r   r   rA  r   r    r   r0   r   r   r   rN   r   r   r   r   Zto_arrow_schemar  r   r   rx   rO   Zfield_by_nametypeZto_pandas_dtyper   )rR   r@   r?   r   
index_namerp   r   r   	output_dfcolspartition_byoutput_tabler   r   rT   Zdataset_colsinput_tableinput_dfZinput_df_colscolZexpected_date_typer#   r#   r$   &_test_write_to_dataset_with_partitions  s^    




ra  c              
   C   s   dd l }dd lm} |tdtdttdtjddddd	d
}|j	
 }tj|}|d krpt }d}t|D ]}	|j|| ||d q|dd |t| D }
t|
|kst|j| ||d }| }| }|| }t|| d S )Nr   rE  rF  r   rG  rH  rI  rb   rJ  )rL  rM  rN  r   rd   )r@   r?   c                 S   s   g | ]}| d r|qS )z.parquet)endswith)r   filer#   r#   r$   r   H  s    
z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>r>   )r   r   r   rq   rV  r   rr   r   r   rY   rW  r   r   r   r   rG   rA  Zlsr0   rO   r   rN   r   rx   Zdrop_duplicatesr   r   )rR   r@   r?   rp   r   rZ  r[  r]  nr   Zoutput_filesr^  r_  r#   r#   r$   $_test_write_to_dataset_no_partitions-  s>    

 
re  c                 C   s   t t| | d S rF   ra  r0   r   r@   r#   r#   r$   %test_write_to_dataset_with_partitionsX  s    rh  c                 C   st   t t jdt  dt jdt  dt jdt  dt jdt  dt jdt jdddg}tt| ||d	 d S )
NrL  )rX  rM  rN  rO  r   us)unitr  )	r   r   r  r_   r  int32	timestampra  r0   )r   r@   r   r#   r#   r$   0test_write_to_dataset_with_partitions_and_schema^  s      rm  c                 C   s   t t| |dd d S )NrY  )rY  rf  rg  r#   r#   r$   4test_write_to_dataset_with_partitions_and_index_namel  s
      rn  c                 C   s   t t| | d S rF   )re  r0   rg  r#   r#   r$   #test_write_to_dataset_no_partitionsu  s    ro  c                 C   s    t | d | t| d | d S )Ntest1test2)ra  re  rg  r#   r#   r$   test_write_to_dataset_pathlib{  s      rr  c              	   C   sd   |\}}t jtdd t| d ||d W 5 Q R X t jtdd t| d ||d W 5 Q R X d S )Nz"path-like objects are only allowedrK   rp  r   rq  )r   r6   r7   ra  re  )r   r   r@   r   r1  r#   r#   r$   &test_write_to_dataset_pathlib_nonlocal  s        rs  c                 C   s   | \}}t |||d d S Nr   )ra  r   r#   r#   r$   *test_write_to_dataset_with_partitions_s3fs  s      ru  c                 C   s   | \}}t |||d d S rt  )re  r   r#   r#   r$   (test_write_to_dataset_no_partitions_s3fs  s      rv  z,ignore:'partition_filename_cb':FutureWarningc           
      C   s   t tdtdttdtjgd tjddddd}d	d
g}tj	|}t
| }dd }tj|||||d tj||d}ddddddg}dd |jD }	t|t|	kstd S )NrE  rF  r   rG  rH  rI  rb   rK  rL  rM  c                 S   s
   dj |  S )Nz{}-{}.parquet)r   )keysr#   r#   r$   partition_filename_callback  s    z_test_write_to_dataset_with_partitions_and_custom_filenames.<locals>.partition_filename_callbackrV   za-e.parquetza-f.parquetzb-e.parquetzb-f.parquetzb-g.parquetzc-e.parquetc                 S   s   g | ]}t j|jqS r#   )r   r    basename)r   pr#   r#   r$   r     s     zNtest_write_to_dataset_with_partitions_and_custom_filenames.<locals>.<listcomp>)rp   rq   rV  r   rr   rO  r   r   r   r   r0   r   rA  rN   rP   sortedr   )
r   r@   rZ  r\  r]  r    rx  rT   Zexpected_basenamesZoutput_basenamesr#   r#   r$   :test_write_to_dataset_with_partitions_and_custom_filenames  s6    


    r|  c                 C   sX   t ddddgi}tj|}t| }tj||t	 d t
|}||sTtd S )Nr  r,   r:   r;   r   )rp   rq   r   r   r   r0   r   rA  r   r   rC   r   r   )r   r   r   r    r"   r#   r#   r$    test_write_to_dataset_filesystem  s    
r}  Fr&   c              	   C   s   | d }t  }tjt|tj|dddgd}tj	
|}d}t||j}t|D ]}	|| q^W 5 Q R X t|}
|
jj|kst| d }||d}t|j| W 5 Q R X tj| ||d	}|rtt |jt|kstW 5 Q R X |S )
Nr=   r   r   rW   rX   r;   r   r   r>   )r   rG   rp   rq   rr   r   r   r   r   r   r   r   ZParquetWriterr   r   rB   ZParquetFiler  num_row_groupsr   r   r   rN   r   r   r   r   r0   )r   r@   r   r    r   r   r   Z
num_groupswriterr   readerr   r   rT   r#   r#   r$   _make_dataset_for_pickling  s6    

  r  c              	      s    fdd}|| st |rtt | j}W 5 Q R X ||sDt ||jsRt t|js`t |jD ]}||sft qf| jD ]D}||st | }|j	st t
|j	D ]}|||st qq~d S )Nc                    s   |    | kS rF   )loadsdumps)objpicklerr#   r$   is_pickleable  s    z3_assert_dataset_is_picklable.<locals>.is_pickleable)r   r   r   r   r  r   rO   r-  r(   r~  r   r-   )rT   r  r@   r  r  r   r)   r   r#   r  r$   _assert_dataset_is_picklable   s     


r  c                 C   s$   dd l }t| |}t|||d d S )Nr   r  r@   )pickler  r  )r   datadirr@   r  rT   r#   r#   r$   test_builtin_pickle_dataset  s    
  r  c                 C   s&   t d}t| |}t|||d d S )NZcloudpickler  )r   importorskipr  r  )r   r  r@   cprT   r#   r#   r$   test_cloudpickle_dataset  s    

  r  c                 C   s   | d }t dddddddgddd	dddd
gdddddddgd}tj|}tj|t|ddg|d tj||d	 }t
||d  d S )Nz
ARROW-3208rS  r   g      @r&   r   r,   g333333=@r:   r   r   )onetwothreer  r  )	root_pathr?  r@   rV   zoutput.parquet)rp   rq   r   r   r   r   rA  r0   rN   r   rB   )r   r@   r    r   r   r#   r#   r$   test_partitioned_dataset(  s      
r  c           	      C   s4  | d }t jdd tdD d gdgd}t jdd tdD d gdgd}tj|t||d	 tj|t||d	 tj|dg|d
 }|d d	 |d d	 g}|d j
dkst|d d|d d }}||d r||d s0tn(||d st||d s0td S )NzARROW-3325-datasetc                 S   s   g | ]}t d qS r   r   Zrandsr   r#   r#   r$   r   @  s     z0test_dataset_read_dictionary.<locals>.<listcomp>rd   r   Zf0r>  c                 S   s   g | ]}t d qS r  r  r   r#   r#   r$   r   A  s     )r  r@   )read_dictionaryr@   r   r:   r,   )r   r   r   r   rA  r0   rN   r   chunkr@  Z
num_chunksr   r   )	r   r@   r    t1t2r"   Z	ex_chunksZc0c1r#   r#   r$   test_dataset_read_dictionary<  s.    $$ 
r  z(ignore:Passing 'use_legacy:FutureWarningc              	   C   s2  t dt dddgt  i}t|| d  t|| d  t dg}tj| d |d}t jddddgi|d}||st	tj| |d}t jdddddddgi|d}||st	t
jtd	d
 tj| d |dd W 5 Q R X tj| |dd}t jdddddddgi|d}| |s.t	d S )Nr9   r,   r:   r;   zdata1.parquetzdata2.parquet)r9   r  r  z'The 'schema' argument is only supportedrK   r=   Tr  F)r   r   rs   rk  r   rB   r   rC   r   r   r   r6   r   rN   r   )r   r   r   r"   r   r#   r#   r$   test_read_table_schemaX  s0        r  c                	   C   s   t jtdd tjddtg d W 5 Q R X t jtdd tjdddd W 5 Q R X t jtdd tjdddd W 5 Q R X t jtdd tjddd	d
 W 5 Q R X t jtdd tjddtg d W 5 Q R X d S )Nznot yet supported with the newrK    F)r@   r  )r@   rQ  T)r@   Zsplit_row_groupsr   )r@   rJ   r  )r   r6   r   r   rN   r   r   rC   r#   r#   r#   r$   !test_dataset_unsupported_keywords|  s     r  c              	   C   s"  dd l m} | d }|d d d jdd tdd	d
dgi}t|t|d d d d  |jdddgd}tj	t||dd}|j
ddddgksttjt||dd }|j
ddddgksttt tj	t||dd W 5 Q R X tt tjt||dd W 5 Q R X d S )Nr   Ztest_partitioningZ201210Z01Tr;  r9   r,   r:   r;   r=   yearmonthday)field_namesF)partitioningr@   )pyarrow.datasetrT   rA   r   r   r   rB   r0   r  rC   rZ   r   rN   r   r   r6   r   )r   dsr  r   rB  r"   r#   r#   r$   test_dataset_partitioning  sB         
    r  c                 C   s`   t ddddgi}t|| d  tt| t }tjd|d}|	 }|
|s\td S )Nr9   r,   r:   r;   r=   r2  r   )r   r   r   rB   r   ZSubTreeFileSystemr0   r   rN   r   r   r   )r   r   r?   rT   r"   r#   r#   r$   #test_parquet_dataset_new_filesystem  s    r  c                 C   sx   t d}|d}tddddgi}t|| d  t| dd	}tj	|||d
}|d }|j
d j|ksttd S )Nfsspecrc  r9   r,   r:   r;   r=   \r   r>   z/data.parquetr   )r   r  r?   r   r   r   rB   r0   replacerN   rP   r    r   )r   r@   r  r?   r   r    rT   r   r#   r#   r$   6test_parquet_dataset_partitions_piece_path_with_fsspec  s    

  r  c              	   C   s  t ddddgi}| d }t|| tj|dd}tjtdd	 |j W 5 Q R X tjtd
d	 |j	 W 5 Q R X tjtdd	 |j
 W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tjtdd	 |j W 5 Q R X tj|dd}tjtdd	 |j W 5 Q R X d S )Nr9   r,   r:   r;   r=   TrV   z'ParquetDataset.piecesrK   z'ParquetDataset.partitionsz'ParquetDataset.memory_mapz'ParquetDataset.read_dictioz'ParquetDataset.buffer_sizez'ParquetDataset.fsrR  z 'ParquetDataset.common_metadata'z'ParquetDataset.metadataz'ParquetDataset.metadata_pathz$'ParquetDataset.common_metadata_pathF)r   r   r   rB   rN   r   r   r   rP   rQ   r!  r  r#  r   r   Zcommon_metadatar  r   r   )r   r   r    rT   r   r#   r#   r$   *test_parquet_dataset_deprecated_properties  s>    r  c              	   C   sx   t ddddgi}| d }tjtdd tj||dd	 W 5 Q R X tjtdd tj||d
d d W 5 Q R X d S )Nr9   r,   r:   r;   r=   z!Passing 'use_legacy_dataset=True'rK   TrV   c                 S   s   dS Nzfilename.parquetr#   r   r#   r#   r$   <lambda>      zEtest_parquet_write_to_dataset_deprecated_properties.<locals>.<lambda>)partition_filename_cb)r   r   r   r   r   r   rA  r   r   r    r#   r#   r$   3test_parquet_write_to_dataset_deprecated_properties  s    r  c              
   C   s>  t ddddgi}| d }tjtdd( tj||dt dt  fgd	 W 5 Q R X tjtd
d tj||ddgd W 5 Q R X tjtdd tj||ddd W 5 Q R X tjtdd tj||ddd d W 5 Q R X tjtdd tj||ddd W 5 Q R X tjtdd tj||ddd W 5 Q R X d S )Nr9   r,   r:   r;   r=   r   rK   T)r@   r   r  )r@   r  r  F)r@   r  file_visitorc                 S   s   | S rF   r#   r  r#   r#   r$   r  *  r  zNtest_parquet_write_to_dataset_unsupported_keywards_in_legacy.<locals>.<lambda>)r@   r  existing_data_behaviorerror)r@   r  basename_templatepart-{i}.parquet)r@   r  )	r   r   r   r6   r   r   rA  r   rk  r  r#   r#   r$   <test_parquet_write_to_dataset_unsupported_keywards_in_legacy  s8    






r  c                    s   t ddddgi}| d }g   fdd}d}tj||dg||d	d
 |d d |d d |d d h}tttj }||kstd S )Nr9   r,   r:   r;   r  c                    s     | j d S rF   )r   r    )Zwritten_fileZpaths_writtenr#   r$   r  <  s    zDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitorr  F)r  r  r  r@   1zpart-0.parquet23)	r   r   r   rA  r   r   pathlibPathr   )r   r   r    r  r  Zexpected_pathsZpaths_written_setr#   r  r$   .test_parquet_write_to_dataset_exposed_keywords5  s     


r  c              	   C   sL  t ddddgi}| d }tjtdd tj||dd	d
 dd W 5 Q R X tjtdd tj||ddd
 dd W 5 Q R X tjtdd tj||ddgdgd W 5 Q R X tjtdd tj||ddgdgd W 5 Q R X tjtdd tj||dg dd
 d W 5 Q R X tjtdd tj||dg dd
 d W 5 Q R X d S )Nr9   r,   r:   r;   r=   zH'basename_template' argument is not supported by use_legacy_dataset=TruerK   Tc                 S   s   dS r  r#   r  r#   r#   r$   r  X  r  z<test_write_to_dataset_conflicting_keywords.<locals>.<lambda>zfile-{i}.parquet)r@   r  r  zM'partition_filename_cb' argument is not supported by use_legacy_dataset=FalseFc                 S   s   dS r  r#   r  r#   r#   r$   r  ^  r  zC'partitioning' argument is not supported by use_legacy_dataset=True)r@   r?  r  zF'partition_cols' argument is not supported by use_legacy_dataset=FalsezC'file_visitor' argument is not supported by use_legacy_dataset=Truec                 S   s   | S rF   r#   r  r#   r#   r$   r  t  r  )r@   Zmetadata_collectorr  zJ'metadata_collector' argument is not supported by use_legacy_dataset=Falsec                 S   s   | S rF   r#   r  r#   r#   r$   r  z  r  )r   r   r   r6   r   r   rA  r  r#   r#   r$   *test_write_to_dataset_conflicting_keywordsO  sL    r  write_dataset_kwarg))
create_dirT)r  Fc              	   C   s   ddl m} tddddgi}| d }t|j}|\}}|ttjj	ksRt
||j	ks`t
tjj|dd	d
:}tj||f||i |jd \}	}
}|| |kst
W 5 Q R X dS )zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr9   r,   r:   r;   zout.parquetwrite_datasetT)Zautospec)r  rT   r   r   inspect	signaturer  r   rA  
parametersr   mockpatchrv   Z
mock_calls)r   r  r  r   r    r  keyargZmock_write_dataset_name_argsr   r#   r#   r$   #test_write_to_dataset_kwargs_passed}  s    r  c                 C   s   t t jdddgdddgddddgd}t|}| d	 }tj|| d	 d
g|d dd | D }t|dksxt	d|kst	d S )Nr9   r\   r]   r   r,   r:   r;   )catr`  rT   r  )r?  r@   c                 S   s   g | ]}|  r|jqS r#   )is_dirr   )r   r   r#   r#   r$   r     s      z;test_write_to_dataset_category_observed.<locals>.<listcomp>zcat=c)
rp   rq   r   r   r   r   rA  iterdirrO   r   )r   r@   r   r   r    subdirsr#   r#   r$   'test_write_to_dataset_category_observed  s    
  r  )T)r   rd   )TNNN)TN)Fr&   )F)|r5   r  r   r  numpyrr   r   Zunittest.mockr  Zpyarrowr   Zpyarrow.computeZcomputer   r   r   r   Zpyarrow.testsr   Zpyarrow.tests.parquet.commonr   r   r   Zpyarrow.utilr   Zpyarrow.vendored.versionr	   r   r   r   r
   r   r   ImportErrorr   rp   rT  rU  r   markZ
pytestmarkr%   r+   filterwarningsr1   r8   rE   rI   rU   r[   r   r   Zxfailr7   r   r   rT   r   r   r   r   r   Zparametrizer  castr  r   r   Zs3r   r   r   r   r   rH   rw   r   r   r   r   r  r  r   r"  r%  r(  r*  r/  r5  r9  r:  r=  rC  rD  ra  re  rh  rm  rn  ro  rr  rs  ru  rv  r|  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#   r#   r#   r$   <module>   s  






V"+"0:!
,(&

a%



    H  
+		"



!

!


.



-