U
    -e                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlmZ d dlmZmZmZmZmZ zd dl Z!W n e"k
r   dZ!Y nX zd dl#m$Z% W n e"k
r   dZ%Y nX zd dl&m'Z( W n e"k
rD   dZ(Y nX ej)j$Z*dd Z+dd Z,dd	 Z-ej.ej)j'd
d Z/ej.dd Z0ej.dddd Z1ej.ej)j'dd Z$ej.ddgddgddd Z2ej)j'dd Z3dd Z4ej)j'dd  Z5ej)j'd!d" Z6ej)j'd#d$ Z7ej)j'd%d& Z8ej)j'd'd( Z9ej)j'd)d* Z:ej)j'd+d, Z;d-d. Z<d/d0 Z=d1d2 Z>d3d4 Z?d5d6 Z@ej)j'd7d8 ZAej)j'd9d: ZBej)j'd;d< ZCd=d> ZDd?d@ ZEej)FdAejGdBddCdDdEggej)FdFddgej)j'dGdH ZHej)j'dIdJ ZIdKdL ZJdMdN ZKej)j'dOdP ZLej)j'ddQdRZMej)j ej)j'ej)NdSdTdU ZOej)j ej)j'dVdW ZPej)j ej)j'ej)NdSdXdY ZQej)j ej)j'ej)NdSdZd[ ZRej)j'd\d] ZSej)j ej)j'd^d_ ZTej)j ej)j'ej)NdSd`da ZUej)j ej)j'dbdc ZVddddeZWej)j ej)j'ej)NdSdfdg ZXej)j'dhdi ZYej)j ej)j'djdk ZZej)j ej)j'ej)NdSdldm Z[ej)j ej)j'ej)NdSdndo Z\ej)j ej)j'ej)NdSdpdq Z]ej)j ej)j'ej)NdSdrds Z^ej)j ej)j'ej)NdSdtdu Z_ej)j ej)j'dvdw Z`ej)j'ej)Fdxdydz d{dz gd|d} Zaej)j'ej)Fd~ddgej)Fdxddz ddz gdd Zbej)Fdxddz ddz gdd Zcej)Fdxddz ddz gdd Zddd Zedd Zfej)j'ej)j dd Zgdd Zhdd Zidd Zjdd Zkdd Zldd Zmdd Znej)j'dd Zodd ZpdddZqdd Zrdd Zsdd Ztej)j'dd Zuej)j'dd Zvej)j'dd Zwej)j'dd Zxej)j'dd Zyej)j'dd Zzej)j'dd Z{ej)j'dd Z|ej)j'dd Z}ej)j'dd Z~dd Zdd ZddĄ ZddƄ Zej)FdddgddɄ Zdd˄ Zej)j'dd̈́ Zej)j'ddτ Zej)j'ddф Zej)j'ddӄ ZddՄ Zddׄ Zej)j'ej)Fdddgej)Fdddgej)Fd~ddgej)Fddddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfdddgdddgfgdd Zej)j dd Zej.ej)j'dd Zej)j'ej)jdd Zej)j'ej)jdd Zej)j'ej)jdd Zej)j'dd Zej)j'dd Zej)j dd Zej)j'dd Zej)j'dd Zej)j'd d Zdd Zdd Zdd Zdd	 Zej)j'd
d Zej)j'dd Zdd Zej)jdd Zej)jdd Zdd Zej)jdd Zej)j dd Zej)j ej)Fdddddgdd  Zd!d" Zd#d$ Zd%d& Zej)j d'd( Zej)j d)d* Zej)j d+d, Zd-d. Zd/d0 Zd1d2 Zej)j ej)Fdddd3gd4d5 Zd6d7 Zej)j'ej)j d8d9 Zej)j'ej)j ej)jejd:kd;d<d=d> Zej)j'ej)j ej)Fd?ddgej)NdSd@dA Zej)j'dBdC Zej)j'ej)j dDdE ZdFdG ZdHdI Zej)j'ej)j dJdK Zej)j'ej)j dLdM Zej)j'ej)j dNdO Zej)j'ej)j dPdQ Zej)j'dRdS Zej)j'ej)j dTdU Zej)j'ej)j dVdW ZdXdY Zej)j ej)j'dZd[ Zej)j'ej)j d\d] Zej)j'ej)j ej)NdSd^d_ ZĐd`da ZŐdƐdbdcZej)j'ddde Zej)j'ej)j dfdg ZȐdhdi Zɐdjdk Zʐdldm Zej)j'dndo Z̐dpdq Zej)j drds ZΐdǐdudvZϐdwdx ZАdydz Zej)j'd{d| Zej)j'd}d~ Zej)j'dd Zej)j'dd Zej)j'ej)j dd Zej)j'ej)j dd Zאdd Zؐdd Zِdd Zڐdd Zېdd Zej)j'dd Zݐdd Zej)j'dd Zej)j'dd Zej)j'ej)j dd Zdd Zej)j'dd Zej)j'ej)jdd ZdZej)j'ej)jdd Zej)j'dd Zej)j$dd Zej)j$dd Zej)j$dd Zej)Fdddgdd Zej)Fdddgdd Zdd Zdd Zej)Fdddd Zdd Zej)j'dd Zdd Zej)Fdddgdd ZdS (      Nquote)
change_cwd_filesystem_uriFSProtocolClassProxyHandler_configure_s3_limited_userc                 C   s   dd l }dd l}| ddd}|jdd}|dddd	d
g}g }t| D ]&}|||t|t|f ||7 }qJtj	|ddddgdS )Nr   i        )daysgreenblueyellowredZorangedateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr   r   dayintervalcolorsdatai r&   [/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/pyarrow/tests/test_dataset.py_generate_dataA   s    
r(   c              
   C   s\   t t dt  t dt  t dt  t dt  g}t jj| |dd}|	 S )Nr   r   r   r   F)schemaZpreserve_index)
par)   fielddate32int64float64stringTableZfrom_pandasreplace_schema_metadata)dfr)   tabler&   r&   r'   _table_from_pandasQ   s    r4   c              
   C   sd   |   D ]V}| D}t|tjs&t|jr0t| s<t| sHt|	 rTtW 5 Q R X qd S N)
get_fragmentsopen
isinstancer*   Z
NativeFileAssertionErrorclosedseekablereadablewritable)datasetfragmentnfr&   r&   r'   +assert_dataset_fragment_convenience_methods\   s    

rA   c            
      C   s  t  } ddg}t|D ]\}}d||}| | | |}ttdttt	tdttt
td|gd dd tdD g}tdt fdt fd	t fd
t fdtt t dfg}tj||d}tj|g}	t|	| W 5 Q R X q| S )Nzsubdir/1/xxxzsubdir/2/yyyz{}/file{}.parquetr
   c                 S   s"   g | ]}|d  t |d  dqS    abstr).0jr&   r&   r'   
<listcomp>z   s     zmockfs.<locals>.<listcomp>i64f64rH   conststructrD   r)   )fs_MockFileSystem	enumerateformat
create_diropen_output_streamlistr   mapr   rH   r*   r)   r-   r.   r/   rO   record_batchr0   from_batchespqwrite_table)
mockfsdirectoriesr%   	directorypathoutr$   r)   batchr3   r&   r&   r'   r]   g   s2    





r]   c                    sx   ddl m}m} ddlm} |   fddt  fdd}| |d	| || }tjfd
d}||fS )Nr   )PyFileSystemLocalFileSystemr	   )r   c                    s    fdd| D S )Nc                    s   h | ]}  t|qS r&   )normalize_pathrH   rI   plocalfsr&   r'   	<setcomp>   s     z6open_logging_fs.<locals>.normalized.<locals>.<setcomp>r&   )pathsrh   r&   r'   
normalized   s    z#open_logging_fs.<locals>.normalizedc                    s$     t|}| | j|S r5   )re   rH   add_fsopen_input_file)selfr`   )ri   openedr&   r'   ro      s    
z(open_logging_fs.<locals>.open_input_filero   c              	   3   s.      z
d V  W 5   | ks(tX d S r5   )clearr9   )Zexpected_opened)rl   rq   r&   r'   assert_opens   s    
z%open_logging_fs.<locals>.assert_opens)	
pyarrow.fsrc   rd   Ztest_fsr   setsetattr
contextlibcontextmanager)Zmonkeypatchrc   rd   r   ro   rQ   rs   r&   )ri   rl   rq   r'   open_logging_fs   s    ry   module)scopec              
   C   s  | j jd | j jd td}t }t|d\}}}}|d t	t|dD ]8\}}d
|}	||	}
tt||
 W 5 Q R X qZ|d ||jjj|jgD ]L\}}d	j
| }d

|}	|| ||	}
tt||
 W 5 Q R X q|d ||jjj|jjjgD ]N\}}dj
| }d

|}	|| ||	}
tt||
 W 5 Q R X q&|d |dD ]N\}}d
|}d

|}	|| ||	}
tt||
 W 5 Q R X q|S )Npandasparquet     plain
   zplain/chunk-{}.parquetr)   zschema/{}/{}z{}/chunk.parquethivezhive/year={}/month={}Z
hive_colorr   zhive_color/color={})configpyarrowrequiresr(   rQ   rR   npZarray_splitrU   rS   rT   rV   r[   r\   r4   groupbyr   dtZ	dayofweekr   yearmonth)requestr2   r]   Zdf_aZdf_bZdf_cZdf_dr%   chunkr`   ra   partfolderr&   r&   r'   multisourcefs   s@    






"






r   c              
   C   sf   t  }tjddd}t d}t ttdt	 tdt
 g|_t | |||}| S )NsubdirT	recursivegroupkey)dsParquetFileFormatrQ   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr*   r)   r+   int32r/   partitioningFileSystemDatasetFactoryfinish)r]   rT   selectoroptionsfactoryr&   r&   r'   r>      s    
r>   TFZthreadedserial)paramsZidsc                    s   | j  G  fddd}| S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                       sT   e Zd Z fddZ fddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )zdataset_reader.<locals>.readerc                    s
    | _ d S r5   use_threads)rp   r   r&   r'   __init__   s    z'dataset_reader.<locals>.reader.__init__c                    s   d|krt d |d< d S )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)rp   kwargsr   r&   r'   _patch_kwargs   s
    z,dataset_reader.<locals>.reader._patch_kwargsc                 [   s   |  | |jf |S r5   )r   to_tablerp   r>   r   r&   r&   r'   r     s    
z'dataset_reader.<locals>.reader.to_tablec                 [   s   |  | |jf |S r5   )r   
to_batchesr   r&   r&   r'   r     s    
z)dataset_reader.<locals>.reader.to_batchesc                 [   s   |  | |jf |S r5   )r   scannerr   r&   r&   r'   r     s    
z&dataset_reader.<locals>.reader.scannerc                 [   s   |  | |j|f|S r5   )r   head)rp   r>   num_rowsr   r&   r&   r'   r     s    
z#dataset_reader.<locals>.reader.headc                 [   s   |  | |j|f|S r5   )r   take)rp   r>   indicesr   r&   r&   r'   r     s    
z#dataset_reader.<locals>.reader.takec                 [   s   |  | |jf |S r5   )r   
count_rowsr   r&   r&   r'   r     s    
z)dataset_reader.<locals>.reader.count_rowsN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r&   r   r&   r'   reader   s   r   )param)r   r   r&   r   r'   dataset_reader   s    	$r   c              	      s,  t t dt  g}t  ddg}dd tddD } fddt||D }td	td
k}tj	|| |d}tj	j
|| ||d}||fD ]}t|tj	stt|jtjst|j|stt|jt|kstt| }t|||D ]\}	}
}|	j|
s t|	j|ks0tt|	jtjsDtt|	tjsVt|	jdgksht|	jdksxtt|	 }|	jt|  krdksn tt|d tjst|d j|kst|d jdgkst|d jdkstqt|jtddkd}t|dkstqtj	|| d}|jtdsVttj	j
|| d}|jtdst| D ]}	|	jtdstqtjtdd t	| | W 5 Q R X tjtdd tj	|| dd W 5 Q R X tjtdd tj	j
| d W 5 Q R X d S )NrN   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetc                 S   s   g | ]}t d |kqS )r   r   r+   rI   xr&   r&   r'   rK   &  s     z+test_filesystem_dataset.<locals>.<listcomp>r	   rC   c                    s   g | ]\}}  ||qS r&   )make_fragment)rI   r`   r   file_formatr]   r&   r'   rK   '  s   leveli9  )r)   rT   
filesystemroot_partition)r)   rT   r   
partitionsr   r   filter   r)   rT   r   Tzincorrect typematch)r)   rT   r   rT   )r*   r)   r+   r-   r   r   r   zipscalarFileSystemDataset
from_pathsr8   r9   rT   partition_expressionequalsru   filesrW   r6   r`   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_grouplenpytestraises	TypeError)r]   r)   rk   r   	fragmentsr   Zdataset_from_fragmentsZdataset_from_pathsr>   r?   	partitionr`   row_group_fragmentsr&   r   r'   test_filesystem_dataset  s           $       r   c              	   C   sj   t t dt  g}t }dg}tjj|||t	 d}|
  tt | | W 5 Q R X d S )Nf1znonexistingfile.arrowr   )r*   r)   r+   r-   r   IpcFileFormatr   r   rQ   rd   r6   r   r   FileNotFoundErrorr   )r   r)   r   rk   r>   r&   r&   r'   1test_filesystem_dataset_no_filesystem_interactiong  s      r   c           	      C   s  t | tjstt | jtjs"ttjdddddgt d}tjdddddgt	 d}|
| D ]<}t |tjsxt|d|st|d|sdtqd||  D ]&}t |tjstt |jtjstq|| }t |tjstt|dksttddk}| jd	|d
}|d }|d ddgksDt|d ddgksZtt|d ddgksttt|d ddgksttddk}| jd	|d
}|d }|d ddddgkst|d ddddgkst|d ddddgkst|d ddddgks ttdtdtddkd}| jd	|d}|d }t|dddgksvt|d ddddddddddg
kst|d ddddddddddg
kst|d ddd	d	ddddd	d	g
kstt|  d S )Nr   r	   r   rC   r   typer   rL   T)r   r   r   rM         ?r   xxxyyy)rO   rF   1g      @)rL   rM   new)r   r   r                  @      @F)r8   r   Datasetr9   r)   r*   Schemaarrayr-   r.   r   RecordBatchcolumnr   r   scan_batchesZTaggedRecordBatchr?   ZFragmentr   r0   r   r+   sort_by	to_pydictsortedrW   rA   )	r>   r   expected_i64expected_f64rb   r3   	conditionresultZ
projectionr&   r&   r'   test_dataset}  sf    
&        r   c                 C   s(   | j ddd}t|}|jdks$td S )N      )Zfragment_readaheadZbatch_readahead   )r   r   num_columnsr9   )r>   r   rb   r&   r&   r'   test_scanner_options  s    r   c           	   	   C   s@  |j | t d}t|tjs"tttj	 |j | dgd W 5 Q R X |j | dgt d}|j
| jkspt|jtdt fgkstt|tjst| }| D ]"}|j|jkst|jdkstq||  kst|j|jkstt|jD ],}t|g}||||ks tq ttj |t|jg W 5 Q R X |j| ksnt|j | ddd	d
gt d}| }ddd	d
g}|j|kst|d}|d  dgd dgd  kst|d  dgd dgd  kst|d	  dgd ks t|d
  dgd ks<td S )N)memory_poolunknownr   rL   )r   r   r	   Z
__filenameZ__fragment_indexZ__batch_indexZ__last_in_fragmentr   r
   r   r   r   T)r   r*   default_memory_poolr8   r   Scannerr9   r   r   ArrowInvalidZdataset_schemar)   projected_schemar-   r   r   r   	to_readerread_allr   r   r   r   ZArrowIndexErrorr   column_namesr   	to_pylist)	r>   r   r   r3   rb   r%   r   Zexpected_namesZsorted_tabler&   r&   r'   test_scanner  s^     
 


&r	  c              	   C   s\   t  }t  }t | z0| }tj| }| }| |ksHt	W 5 t | X d S r5   )
r*   r  Zsystem_memory_poolZset_memory_poolZbytes_allocatedr   r  Zfrom_datasetr   r9   )r>   old_poolpoolZallocated_beforer   _r&   r&   r'   test_scanner_memory_pool  s    
r  c                 C   s  | | d}|tjjg | jdks&t|j | ddgd }|ddgiksNt|j | ddgtddkd }|dddgikst|j | d	dgd }|dt	t
d
d ikstt|  }|j ddgd }|ddgikst|j d	dgd }|dt	t
d
ikstd S )Nr   rP   r	   rL   r   r   r   r   rC   i   r
   )r   r*   r0   rZ   r)   r9   r   r   r+   rW   r   r   r6   )r>   r   r   r?   r&   r&   r'   	test_head  s    
r  c              	   C   s   t |  }ddgtddgfD ](}|||}||||ks"tq"tt	 ||tdg W 5 Q R X ddgtddgfD ]$}|| ||| |kstqtt	 || tdg W 5 Q R X d S )Nr	   rC   r
   r   r   )
r   r6   r*   r   r   r   r9   r   r   
IndexError)r>   r   r?   r   expectedr&   r&   r'   	test_take  s     
r  c                 C   s   t |  }||dkst|j|tddkddks>t|| dksPt|j| tddkddkspt|j| tddkddkst|j| tdd	k dd	kstd S )
Nr
   rL   r   r   r	   r   r   rC   r   )r   r6   r   r9   r   r+   )r>   r   r?   r&   r&   r'   test_count_rows+  s"       r  c               
   C   s:   t jt jt jg} | D ] }tt |  W 5 Q R X qd S r5   )r   Z
FileFormatr  Partitioningr   r   r   )classesklassr&   r&   r'   test_abstract_classes;  s    r  c                  C   sZ  t t dt  t dt  g} tjtjtjfD ]8}|| }t	|tj
sRt||| ksbt|dks6tq6t t dt  t dt  g} t| }t|jdksttdd |jD st|d	}t	|tjsttdd
ktddk@ }||sttt j |d W 5 Q R X |d}tdd
k}||sZt|tj| ddksrtt t dt  t dt  g} tj| dd}t|jdksttdd |jD st|d}tdtdktdtd
k@ }||st|d}td tdtd
k@ }||sRtdD ](}tt j || W 5 Q R X qV|tj| ddkstt t dt  t dt  g} t| }t|jdksttdd |jD st|d}t	|tjsttdd
ktddk@ }||s>ttt j |d W 5 Q R X |tj| ddksxtt t dt  t dt t  t  g} tj| dt dddgid }|jd d kst|jd!  dddgkst|tj| d d ksttjt t dt  t dt t  t  gdt dddgid }|jd d ksrt|jd!  dddgkstt jt td"t tj d"t t!d#d$gd%gd&d'd(gd)}t d(t  fg}tjtjtjfD ]\}t"# H}||}tj$||d*|d+ tj%|d*|d+}	|	& }
|
|sHtW 5 Q R X qd S ),NrL   rM   zother objectr   r   r   c                 s   s   | ]}|d kV  qd S r5   r&   r   r&   r&   r'   	<genexpr>X  s     z$test_partitioning.<locals>.<genexpr>z/3/3.14/rC   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc                 s   s   | ]}|d kV  qd S r5   r&   r   r&   r&   r'   r  n  s     z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc                 s   s   | ]}|d kV  qd S r5   r&   r   r&   r&   r'   r    s     z3_3.14_Zprefix_3_aaa_firstsecondthirddictionariesr	      rE   rF   r   r   f2r   namesipcrT   r   )'r*   r)   r+   r-   r.   r   r   HivePartitioningFilenamePartitioningr8   r  r9   r   r%  allparse
Expressionr   r   r   r  r   is_null
dictionaryint8r/   r   r  r3   r   r   randomrandnrepeattempfileTemporaryDirectorywrite_datasetr>   r   )r)   r  r   exprr  Z
shouldfailr3   partitioning_schematempdir	load_backload_back_tabler&   r&   r'   test_partitioningF  s    




 

       
 r?  c               
   C   s   t t dt  t dt  g} t| t| t| tj| ddtj| ddtj| dddg}|D ]}t	
t	||ksltqld S )NrL   rM   r  r  r  )r  r  )r*   r)   r+   r-   r.   r   r   r,  r-  pickleloadsdumpsr9   )r)   partsr   r&   r&   r'   test_partitioning_pickling  s    	rD  c                  C   s   t tdddgdddgd} t d}t d}| j|d || |d |d| dd	}tddd
gdddgdd
dgdddgd}||std S )Nr	   r   rC   rD   rE   rF   r.   )za+1zb-aza*2za/br   r   r            ?r   g      ?)	r   r>   r*   r3   r+   r   castr   r9   )r>   rE   rF   r   r  r&   r&   r'   $test_expression_arithmetic_operators  s     "


  rI  c                  C   s   dd dD \} }}t | ddiks*tt | t | ksBtt | |@ |@ dd dD ksftt ddk}t |i kstt | |@ ddikstt d }t |dd ikstd S )	Nc                 S   s   g | ]}t ||kqS r&   r   rI   fr&   r&   r'   rK     s     z'test_partition_keys.<locals>.<listcomp>abcrE   c                 S   s   i | ]
}||qS r&   r&   rJ  r&   r&   r'   
<dictcomp>  s      z'test_partition_keys.<locals>.<dictcomp>drC   )r   Zget_partition_keysr9   Z_get_partition_keysr+   r1  )rE   rF   cZnopenullr&   r&   r'   test_partition_keys  s    $rQ  c                  C   s   t  } t jddgd}t jdd}| jt ks4t|jddhksFt| jdksTt|jdksbt| | ksnt| |kszt| |kstd S )NrE   rF   dictionary_columnsmscoerce_int96_timestamp_unitns)r   ParquetReadOptionsrS  ru   r9   rV  )opts1opts2opts3r&   r&   r'   test_parquet_read_options  s    r\  c                  C   sf   t  } t jdhd}t jdd}| jt  ks4t|jt jdgdksLt|jt jddksbtd S )NrE   rR  srU  )r   r   read_optionsrX  r9   )Zpff1Zpff2Zpff3r&   r&   r'   %test_parquet_file_format_read_options  s    r_  c                  C   sn  t  } t jdd}t jddd}t jddd}t jddd	}| jd
ksLt| jdksZt| jd
ksht| jdksvt| jdkst|jd
kst|jdkst|jd
kst|jdkst|jdkst|jd
kst|jd
kst|jdkst|jdkst|jdkst|jdks$t| | ks2t| |ks@t||ksNt||ks\t|| ksjtd S )N   buffer_sizei    T)rb  use_buffered_stream)rb  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitFi @B )r   ParquetFragmentScanOptionsrc  r9   rb  rd  re  rf  )rY  rZ  r[  Zopts4Zopts5r&   r&   r'   test_parquet_scan_options  s@     ri  c                  C   s  t  t  t tjjdddt jtjjddgddt jtjjddd	dt  t jtjjdd
ddt jtjjddddg} z| 	t 
  W n tk
r   Y nX td k	r| t  t jdhdt jddt jdddddg | D ]}tt||kstqd S )N	T)	delimiterZignore_empty_linesrC   foo)	skip_rowsr  r^  i   )rm  
block_sizeignoreZnewlines_in_valuesZunexpected_field_behavior)Zparse_optionsF   r   ro  rE   rR  )rc  r`  {   i  )rc  rb  re  rf  )r   r   CsvFileFormatr*   csvParseOptionsReadOptionsJsonFileFormatjsonr   OrcFileFormatImportErrorr[   extendr   r@  rA  rB  r9   )formatsr   r&   r&   r'   test_file_format_pickling5  sT    
 
 
 
r  c               
   C   s   t  t jtjjdddt jtjjdddt  t tjjddd	t jtjjdd
ddg} t	d k	r| 
t jddt jddg | D ]}tt||kstqd S )NT)strings_can_be_nullconvert_options   ro  rn  Ferrorrq  i   rs  r`  ra  )rd  )r   CsvFragmentScanOptionsr*   rv  ConvertOptionsrx  JsonFragmentScanOptionsrz  rw  r[   r}  rh  r@  rA  rB  r9   )r   optionr&   r&   r'   #test_fragment_scan_options_pickling\  s0    

r  paths_or_selectorr   r   r   r   rd  c                 C   s  t jt jdhd|d}t d}t ttdt tdt	 g|_
|jdks^t|jddgkspt|jd	ks~tt | |||}| }| jttd
t tdt tdtt t	 tdt tdtt t	 dtdt tdt	 gd	ds2tt| tsFtt||t js^t|jt dsvt| }t|t jst| }tjdddddgt d}	tjdddddgt d}
tj tjdddddgt dtjd! t	 d}tdd t"dD }|# }t$|ddgddgD ]\\}}}}tj|gd t d}tj|gd t	 d}tj|d gd t d}|j%d k	st|j&dkst|d |	st|d |
st|d |st|d |st|d |st|d |s0t|d |s<tq<|' }t|tj(sbtt)|dkstt|j&dkstd S ) NrH   rR  )r^  rd  r   r   r   .r  FrL   rM   rN   rO   rD   Zcheck_metadataTr   r	   r   rC   r   r   z	0 1 2 3 4c                 S   s"   g | ]}|d  t |d  dqS rB   rG   rI   r%   r&   r&   r'   rK     s   z+test_filesystem_factory.<locals>.<listcomp>r
   r   r   r   rF  r   )*r   r   rX  r   r   r*   r)   r+   r   r/   r   Zpartition_base_dirr9   Zselector_ignore_prefixesZexclude_invalid_filesr   inspectr   r-   r.   r2  rO   r8   inspect_schemasrW   r   r   r   r   r   r   ZDictionaryArrayfrom_arrayssplitr   r   r   r   r   r   r0   r   )r]   r  rd  rT   r   r   inspected_schemar>   r   r   r   Zexpected_strZexpected_structiteratorrb   r?   r   r   Zexpected_groupZexpected_keyZexpected_constr3   r&   r&   r'   test_filesystem_factoryu  s    

   	


$r  c                 C   s   t  }t jd| |d}|jD ]}||| }|jdgks>t|j|| dgd}||fD ]6}t|t jslt|j	|ksztt|j
t| sXtqX|jdgkstqd S )N/plainr   rT   r   r   )r   r   r>   r   r   r   r9   r8   r   r`   r   r   )r   parquet_formatr>   r`   r?   Zrow_group_fragmentrK  r&   r&   r'   test_make_fragment  s    
r  c                 C   s   t d}t|d}t }||}t|	 tj
s@ttjdddgdddgd	d
dggdddgd}| ||sttt|}| || std S )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rE   rF   rO        r   dogcatrabbitr  numanimalr(  )textwrapdedentr*   	py_bufferencoder   ru  r   r8   r7   BufferReaderr9   r3   r   r   r@  rA  rB  )r   contentbuffer
csv_formatr?   r  pickledr&   r&   r'   "test_make_csv_fragment_from_buffer  s    

r  c                 C   s   d}t |d}t }||}t| t js:t	t j
dddgdddgd	d
dggdddgd}| ||szt	tt|}| || st	d S )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
r  rE   rF   rO  r  r  r   r  r  r  r  r  r  r(  )r*   r  r  r   ry  r   r8   r7   r  r9   r3   r   r   r@  rA  rB  )r   r  r  Zjson_formatr?   r  r  r&   r&   r'   #test_make_json_fragment_from_buffer  s    
r  c                 C   s   t dddgt dddgt ddd	gg}|d
  |d |d  g}tjtjddgdddd}|t f||fg}|D ]z\}}t j|dddgd}t  }t	|| |
 }||}	| |	|sttt|	}
| |
|stqd S )NrE   rF   rO  r  r  r   r  r  r  r   r	   r   r  r  rR  Tr`  )r^  rc  rb  r  r(  )r*   r   dictionary_encoder   r   rX  r3   ZBufferOutputStreamr[   r\   getvaluer   r   r   r9   r@  rA  rB  )r   arraysZdictionary_arraysZdictionary_formatcasesZformat_r3   ra   r  r?   r  r&   r&   r'   &test_make_parquet_fragment_from_buffer  s6    

	

r  c                 C   sn   t jtddgd dgd dgd  gdddgd	}t| d
 }tj||dg|d tj|dd|d}||fS )Nr   r	   rE   r   rF   r   r'  r   r(  test_parquet_dataset)partition_cols
chunk_sizer}   r   )rT   r   r   )r*   r3   r   rH   r[   write_to_datasetr   r>   )r<  r  r   r3   r`   r>   r&   r&   r'   _create_dataset_for_fragments.  s     "    r  z6ignore:Passing 'use_legacy_dataset=True':FutureWarningc                 C   s:  t | \}}t| }t|dks(t|d }ddg}|jj|ksHt|j|j	|j
|jksdt|jtddks~t||}|j|kst||dddst|j||jd}|jdddgkst||ddst|j|jdkst|j||jtddk d	}|jdddgks6td S )
Nr   r   r   r'  r   rE   r   rP   )r)   r   )r  rW   r6   r   r9   physical_schemar)  rT   r  r`   r   r   r   r   r+   r   r  Zremove_columnslicer)   remove)r<  r   r3   r>   r   rK  Zphysical_namesr   r&   r&   r'   test_fragmentsA  s*    
  r  c                 C   s   t jtddgd dgd  gddgd}t| d }tj||dgd	 tjt d
gdd}tj	|d|d}|j
tddkd}tt|dkstd S )Nr   r	   r   r   colr   r(  r  r  )r   r3  r   flavorr}   r+  r   )r*   r3   r   rH   r[   r  r   r   r)   r>   r6   r+   r   rW   r9   )r<  r3   r`   r   r>   r   r&   r&   r'   test_fragments_implicit_caste  s    *r  c           	   	      s  t | \ }d fdd	}t| d }|j}tt|}||||ksXt|j	|j
|j|jd}||||st||d |j	|j
|j|jd}||dtddk d	 |j	|j
|j|jd}||ddgtdd
k d |j	|j
|j|jd}||dtddkd	 d|jddd }tjt|d4 |j	|j
|j|jd}|j|tddkd	 W 5 Q R X d S )Nc                    sP   | j  j||d}|r|n j}|j|ks.t j| |}||sLtd S )Nr)   r   r   )r   r)   r  r9   r  selectr   )r?   Z	row_slicer   r   actualr  r  r3   r&   r'   assert_yields_projectedz  s      z;test_fragments_reconstruct.<locals>.assert_yields_projectedr   )r   )r   r   )r   r   r   r   r   r   r  r   rE   z&No match for FieldRef.Name\(part\) in Fr   )NN)r  rW   r6   rT   r@  rA  rB  r   r9   r   r`   r   r   r   r   r+   r  Z	to_stringr   r   
ValueError)	r<  r   r>   r  r?   r  pickled_fragmentnew_fragmentpatternr&   r  r'   test_fragments_reconstructs  sl       
 

     r  c                 C   sb  t | dd\}}t| d }t| }t||j  krFdksLn t|j|d |jd}|j	dddgksttt|dkst|
|ddst|d jd k	st|d jdkst|d jd jddd	ddd	d
kstt|jtddk dd }t|tddk }t|dks0t|j|d tddk d}t|dks^td S )Nr   r  r   rP   r   r'  r   r	   minmaxr   r'  r   )r  rW   r6   r   r   r   r9   r   r)   r  r   r  r   
statisticsr   r+   )r<  r   r3   r>   r?   r   r   r&   r&   r'   !test_fragments_parquet_row_groups  s2       r  c                 C   s   t dtdi}tj|| d dd tj| d dd}t| d }|j	j
|j|jd	d
gd}|jdkspt|  |jdkstt|jdkstd S )NrE   r   test.parquetr   row_group_sizer}   r   r   r	   rC   r  )r*   r3   r   r[   r\   r   r>   rW   r6   rT   r   r`   r   r   r9   ensure_complete_metadatar   r   )r<  r3   r>   Zoriginal_fragmentr?   r&   r&   r'   %test_fragments_parquet_num_row_groups  s     r  c                 C   s   dd l }|tddgddgd}|d d|d< tt|| d	  dd lm	} |	| d	 }|j
||ddkd
}|jd | k  std S )Nr   rE   rF   r	   r   )col1col2r  categoryztest_filter_dictionary.parquetr   )r|   r   dictastyper[   r\   r*   r3   pyarrow.datasetr>   r   r+   Ziloc	to_pandasr.  r9   )r<  r   r   r2   r   r>   r   r&   r&   r'   ,test_fragments_parquet_row_groups_dictionary  s    r  c           
   	   C   sZ  |\}}t | d|d\}}t| d }||jg |  W 5 Q R X |jddgks\t|g  |  W 5 Q R X t|jt	j
st|jj|j|jddgd}|j|jkst|  |jd }|jdkst|jdkst|jd k	sttt|}	||jgD |	jddgks"t|	jd }|jdks<t|jd k	sLtW 5 Q R X d S )Nr   r  r   r   r	   r  )r  rW   r6   r`   r  r   r9   r8   metadatar[   ZFileMetaDatarT   r   r   idr   r  r@  rA  rB  )
r<  ry   rQ   rs   r  r>   r?   r  	row_groupr  r&   r&   r'   &test_fragments_parquet_ensure_metadata  s<      

  

r  c           	   	   C   s|   |\}}t | |d\}}t| d }|g  tt|}W 5 Q R X ||jg |j}W 5 Q R X |dgksxtd S )Nr   r	   r   )	r  rW   r6   r@  rA  rB  r`   r   r9   )	r<  ry   rQ   rs   r  r>   r?   r  r   r&   r&   r'   )test_fragments_parquet_pickle_no_metadata#  s    
r  c                 C   s  t jt dd dgt  t dddgt  t dddgt  t dddgt  t dddgt  t dddgt  t dddgt 	 t dddgt 
 t dddgt  t dddgt  t dddgt  t d	d d
gt  t d	d d
gt  t dddgt dt dddgt dt dddgt dt dddgt  t dddgt  t dddgt dt dddgt dgdddddddddddddddddd d!d"gd#}t| d$ }tj||d|d% |tj|d&d'd(fS ))NTFr	   r   *   r   g      $@      E@rE   zr]  rT  usl    jt booleanr3  uint8int16uint16r   uint32r-   uint64r   doubleutf8binaryts[s]ts[ms]ts[us]r,   date64time32time64r(  Ztest_parquet_dataset_all_types)use_legacy_datasetr  r}   r   r+  )r*   r3   r   Zbool_r3  r  r  r  r   r  r-   r  Zfloat32r.   r  r  	timestampr,   r  r  r  rH   r[   r  r   r>   )r<  r  r3   r`   r&   r&   r'   _create_dataset_all_types6  sb    /
r  c                    s  t | \}}t| d }dd l  fdd} fdd} fdd} j} j}t| }	|	d jd k	srt|	d jd }
|
j	dkst|
j
d	kst|
jd
dddddddddddddddddddddddddddddddddddddd|d|dd|d|dd|d|dd|ddd|dddd|ddd|dddd|ddd|dddd|dddd|ddddddkstd S )Nr   c                    s     ddddd| S N  r	   r   r   r   r  r&   r'   dt_sy      z.test_parquet_fragment_statistics.<locals>.dt_sc              
      s     dddddd| d S )Nr  r	   r   r~   r  r  r  r&   r'   dt_msz  r  z/test_parquet_fragment_statistics.<locals>.dt_msc              	      s     dddddd| S r  r  r  r  r&   r'   dt_us{  r  z/test_parquet_fragment_statistics.<locals>.dt_usrC   r~   FTr  r	   r  r   r  rE   r     a   zr  r   r     )r  r3  r  r  r  r   r  r-   r  r   r  r  r  r  r  r  r,   r  r  r  )r  rW   r6   r   r   timer   r   r9   r   Ztotal_byte_sizer  )r<  r3   r>   r?   r   r  r  r   r  r   r  r&   r  r'    test_parquet_fragment_statisticso  sF    r  c                 C   sv   t ddd d gddd d gd}tj|| d dd tj| d d	d
}t| d  }|d j	d j
i ksrtd S )Nr   r	   rE   rF   rD   r  r   r  r}   r   )r*   r3   r[   r\   r   r>   rW   r6   r   r   r  r9   )r<  r3   r>   r   r&   r&   r'   &test_parquet_fragment_statistics_nulls  s
     r	  c                 C   sx   t dddgdddgdd d }|j| d d	d
 tj| d dd}t| d  }|d jd j	i kstt
d S )NrE   rF   r   r
   rF  rD   r   r  r   Zenginer}   r   )r   r   
to_parquetr   r>   rW   r6   r   r   r  r9   )r<  r2   r>   r   r&   r&   r'   'test_parquet_empty_row_group_statistics  s
    $r  c                 C   s   t | dd\}}t| d }|jtddks:tt|jtddk|j	d}t
|dkshtt|jtddk|j	d}t
|dkstd S )Nr   r  r   r   rE   r   r)   rF   )r  rW   r6   r   r   r   r+   r9   r   r)   r   )r<  r3   r>   r?   r   r&   r&   r'   +test_fragments_parquet_row_groups_predicate  s    r  c           
   	   C   s6  t | dd\}}t| d }|j}t| }tt|}||||ksZt	|j
|j|j|jdgd}||}	|	||d st	|j
|j|j|jdhd}|j||jddgtddk d	}	|	jddgkst	t|	dkst	|j
|j|j|jdhd}tjtd
d || W 5 Q R X d S )Nr   r  r   )r   r   r	   r   r   rC   r  zreferences row group 2r   )r  rW   r6   rT   r   r@  rA  rB  r   r9   r   r`   r   r   r   r)   r   r+   r  r   r   r   r  )
r<  r   r3   r>   r?   r  r   r  r  r   r&   r&   r'   -test_fragments_parquet_row_groups_reconstruct  sP     
    r  c           
   	   C   s  |\}}t | d|d\}}t| d }|jddgd}|g : |jdksRt|jddgksdt|jd jd k	sxtW 5 Q R X ||}	|		 ddgddgdkst|jg d}|jdkst|jg kst|j||j
d}	|	jdkst|	|d d std S )	Nr	   r  r   rC   Zrow_group_idsr   r  rP   )r  rW   r6   subsetr   r9   r   r  r   r   r)   r   r   
r<  ry   r   rQ   rs   r3   r>   r?   Zsubfragr   r&   r&   r'   !test_fragments_parquet_subset_ids  s$    


r  c           
   	   C   sH  |\}}t | d|d\}}t| d }|tddk}|g : |jdksVtt|j	dksht|j	d j
d k	s|tW 5 Q R X ||}	|	 dddgdddgdkst|tddk}|jdkst|j	g kst|j||jd	}	|	jdkst|	|d d st|jtd
dk|jd	}|jdksDtd S )Nr	   r  r   r   rC   r   r  r
   rP   r   rE   r   )r  rW   r6   r  r   r+   r   r9   r   r   r  r   r   r)   r   r   r  r&   r&   r'   $test_fragments_parquet_subset_filter  s(    


"r  c              	   C   st   t | dd\}}t| d }tt" |jtddkddgd W 5 Q R X tt |  W 5 Q R X d S )Nr	   r  r   r   r   r  )	r  rW   r6   r   r   r  r  r   r+   )r<  r  r>   r?   r&   r&   r'   %test_fragments_parquet_subset_invalid<  s    &r  c                 C   s   t | d }t|dks tt| \}}tj|dd}t | d }t|d|j	t
|kslt| d }tj|| tj|dd}t | d }t|d|j	t
|kstd S )	Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>r}   r   z-<pyarrow.dataset.ParquetFileFragment path={}>data.featherfeatherz/<pyarrow.dataset.FileFragment type=ipc path={}>)rW   r6   reprr9   _create_single_filer   r>   rT   r   re   rH   r*   r  write_feather)r<  r>   r?   r3   r`   r&   r&   r'   test_fragments_reprL  s0    r  r  c                 C   s   | S r5   r&   r  r&   r&   r'   <lambda>o  r  r  c                 C   s   t t | S r5   r@  rA  rB  r  r&   r&   r'   r  o  r  c           
      C   s   t jddd}t }td}tjddg}||}t|tjsHt	||_
t| |||}| }tdt fdt fdt fd	t fd
tt t dfdt fdt fg}||st	tj }	t|	tjst	d S )Nr   Tr   r   r   rL   rM   rH   rN   rO   rD   )rQ   r   r   r   r   r   discoverr8   PartitioningFactoryr9   partitioning_factoryr   r  r*   r)   r-   r.   r/   rO   r   r   r,  )
r]   r  r  rT   r   r   r   r  expected_schemaZhive_partitioning_factoryr&   r&   r'   test_partitioning_factorym  s4    
   





	
r"  infer_dictionaryc                 C   s   | S r5   r&   r  r&   r&   r'   r    r  c                 C   s   t t | S r5   r  r  r&   r&   r'   r    r  c                 C   s6  t jddd}t }td}tjjddg|d}|||_t| |||}|	 }|rt
t
 t
 }	|dj|	kst|   }
|
dd}t
dgd	 d
gd	   }||st| jtddkd}
|
dd}|dd	}||s2tn|djt
 ks2td S )Nr   Tr   r   r   r#  r   r   r
   r   r   )rQ   r   r   r   r   r   r  r   r   r  r*   r2  r   r/   r+   r   r9   r   r   combine_chunksr   r   r   r  r   r  )r]   r#  r  r  rT   r   r   r   inferred_schemaexpected_typer3   r  r  r&   r&   r'   $test_partitioning_factory_dictionary  s6    
 
   r(  c                 C   s   | S r5   r&   r  r&   r&   r'   r    r  c                 C   s   t t | S r5   r  r  r&   r&   r'   r    r  c                 C   sJ  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }dD ]T}|| ||d 2}	tj|	|}
|
| |
  W 5 Q R X W 5 Q R X qt jd	d
d}td	}tjj|d}| ||_t||||}| }||ksBt| jdtdt id}|d d  dksttjjddgdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|dd}| ||_#t||||}t|   }|d j!"tddktddk@ sRttjj|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X t jdd
d}td}tj'j|d}| ||_t||||}| }||kst| jdtdt id}|d d  dks4ttj'jdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj'|dd|_#t||||}t|   }|d j!"tddktddk@ sttj'j|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X d S )NrL   r   rP   r   r]  r/   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherr_   Tr   date_intr   r   逎`r  r  2021-05-04 00%3A00%3A00%24r)   r  +Could not cast segments for partition fieldr   r   )(rQ   rR   r   r   r*   r)   r-   r3   r   r   r  r/   rW   rU   rV   r*  new_filer\   closer   r   r   r  r   r   r  r9   r   r   r+   rH  as_pyr6   r   r   r   r   r   r  r,  )r  r]   rT   r)   r3   partition_schemastring_partition_schemafull_schemar_   sinkwriterr   r   r   r   r&  r  r   r   r&   r&   r'   *test_partitioning_factory_segment_encoding  s    



  


 


 


 


 

 
r8  c                 C   s   | S r5   r&   r  r&   r&   r'   r    r  c                 C   s   t t | S r5   r  r  r&   r&   r'   r    r  c              
   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }tdt
dfdt fg}tdt fdt fg}	d	}
||
 ||
d
 2}tj||}|| |  W 5 Q R X W 5 Q R X t jddd}td}tjj|d}| ||_t||||}| }||kszt| jdtdt id}|d d  dksttjjdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|dd}| ||_#t||||}t|   }|d j!"tddktddk@ sttjjdd}| ||_t||||}t|   }|d j!"tddktddk@ sttj|	dd}| ||_#t||||}t|   }|d j!"tddktddk@ sPttjj|dd}| ||_t||||}t$j%tj&dd | }W 5 Q R X d S )NrL   r   rP   ztest'; dater]  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r)  r   Tr   r*  r   r   r+  urir  z2021-05-04 00:00:00$r  r,  r-  r.  r/  r   )'rQ   rR   r   r   r*   r)   r-   r3   r   r   r  r/   rW   rU   rV   r*  r0  r\   r1  r   r   r,  r  r   r   r  r9   r   r   r+   rH  r2  r6   r   r   r   r   r   r  )r  r]   rT   r)   r3   r3  r4  r5  Zpartition_schema_enZstring_partition_schema_enr_   r6  r7  r   r   r   r   r&  r  r   r   r&   r&   r'   ;test_partitioning_factory_hive_segment_encoding_key_encoded  s    






 


 





 


 
r;  c              
   C   sv   t ddd gdddgd}tt t dt  t dt  g}tt j	 tj
|| d|d W 5 Q R X d S )	Nr   yr  rD   rE   rF   r*  r+  )r*   r3   r   r   r)   r+   r/   r   r   r  r9  r<  r3   r   r&   r&   r'   /test_dictionary_partitioning_outer_nulls_raisesn  s    $r>  c              	   C   sD   t ddd gdddgd}tt t|| d W 5 Q R X d S )Nr   r<  r  rD   zbasename-{i}.arrow)r*   r3   r   r   r   r   r9  )r<  r3   r&   r&   r'   test_positional_keywords_raisesv  s    r?  c                 C   s   d}t t d|d t|d d}tj|d | | d dgd tj|d |d  | d dgd tj| d dgd	}|d jdkst	tj| d dd
gd	}|d jdkst	tj| d dgd	}|d jdkst	d S )Ni   r   r	   )r   r   oner   r  twor   r   r   )
r*   r3   r6  r   aranger[   r  
read_tableZ
num_chunksr9   )r<  Z
BATCH_SIZEr3   r&   r&   r'   test_read_partition_keys_only|  s*    
  rD  c                    s    t  }t fdd|D S )Nc                    s"   g | ]}t jt j |qS r&   )osr`   isdirjoin)rI   elbasedirr&   r'   rK     s     z _has_subdirs.<locals>.<listcomp>)rE  listdirany)rJ  elementsr&   rI  r'   _has_subdirs  s    
rN  c                 C   sZ   t | D ]J}t j| |}t j|r
t||}t|rJt||| q
|| q
d S r5   )	rE  rK  r`   rG  rF  	posixpathrN  _do_list_all_dirsr   )rJ  Zpath_so_farr   rK  Ztrue_nestedZnorm_nestedr&   r&   r'   rP    s    rP  c                 C   s   g }t | d| |S )N )rP  )rJ  r   r&   r&   r'   _list_all_dirs  s    rR  c                 C   s    t t| }|t |kstd S r5   )ru   rR  r9   )r<  Zexpected_directoriesZactual_directoriesr&   r&   r'   _check_dataset_directories  s    rS  c              
   C   sn   t dddgddd gd}tt t dt  t dt  g}tj|| d|d t| d	d
dg d S )Nr   r<  r  rD   rE   rF   r*  r+  zx/xzy/y)	r*   r3   r   r   r)   r+   r/   r9  rS  r=  r&   r&   r'   (test_dictionary_partitioning_inner_nulls  s    $rT  c              
   C   sr   t dd dgddd gd}tt t dt  t dt  gd d}tj|| d|d	 t| d
ddg d S )Nr   r  r<  rD   rE   rF   r  r*  r+  za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r*   r3   r   r,  r)   r+   r/   r9  rS  r=  r&   r&   r'   test_hive_partitioning_nulls  s     rU  c               	   C   s  t dt  fdt  fg} ddg}t| }t|tjs@ttj| dd}t|tj	s^ttj|d}t|tj	sztt
t t  W 5 Q R X t
jtdd tj| d W 5 Q R X t
jtdd tj| | d W 5 Q R X tj| d	d
}t|tjsttj| dd	d}t|tj	s(ttjd	d
}t|tj	sFtt
t tj|d	d
 W 5 Q R X t
jtdd tj|d	d W 5 Q R X t
t tj| dd
 W 5 Q R X d S )Nr   r   Zinferr$  )field_nameszExpected listr   zCannot specify bothr   r  )r%  r  zCannot specify 'field_names')rV  r  unsupported)r*   r)   r  r3  r   r   r8   r   r9   r  r   r   r  r,  )r)   r)  r   r&   r&   r'   test_partitioning_function  s4    
rX  c                 C   s   t t dt t  t  t dt t  t  g}tjj	|d}tj
dd| |d}|jj|ksnt| }|dj|jd st|d dgd	 d
gd	  kst|dj|jd st|d dgd	 dgd	  kstd S )Nr   r   rP   r   r}   rT   r   r   r   r	   r
   r   r   r   )r*   r)   r+   r2  r3  r   r/   r   r   r  r>   r   r9   r   r   r   r   typesr  )r]   r)   r   r>   r3   r&   r&   r'   *test_directory_partitioning_dictionary_key  s"       &r[  c           	      C   s2  t t dt t  t  t dt t  t  g}tjj|d}tj	dd| |d}|j
j|ksnt| }ttdd}ttd	d
}|dj|jd st|djD ]"}|j }|  ||kstq|dj|jd	 st|djD ]&}|j }|  ||kstqd S )Nr   r   rP   r   r}   rY  i  i  r	      r   )r*   r)   r+   r2  r3  r  r   r,  r  r>   r   r9   r   rW   r   r   r   r   rZ  chunksr  sort)	r   r)   r   r>   r3   Zyear_dictionaryZmonth_dictionaryr   r  r&   r&   r'   %test_hive_partitioning_dictionary_key  s2       

r_  c                 C   sL   |d kr,t tddgd dgd  d}| d }tj|||d ||fS )	N	   r   r   r   r
   rD   r  r  r*   r3   r   r[   r\   )base_dirr3   r  r`   r&   r&   r'   r    s
    $r  c                 C   s   t tddgd dgd  d}| d }t|| t tdddgd dgd  d}| d	 }t|| ||f||ffS )
Nr`  r   r   r   r
   rD   ztest1.parquetr  ztest2.parquetra  )rb  Ztable1path1Ztable2path2r&   r&   r'   _create_directory_of_files   s    $&re  c                 C   sD   | t t | fD ]*}| j|js*t|| |stqd S r5   )r@  rA  rB  r)   r   r9   r   )r>   r3   r   rN  r&   r&   r'   _check_dataset*  s    rf  c              	   K   s   t | tjst| t| | gt| gfD ].}tj| f|}t |tjsJtt||| q(t	| j
2 tj| jf|}t |tjstt||| W 5 Q R X d S r5   )r8   pathlibPathr9   rH   r   r>   r   rf  r   parentname)r`   r3   r   r   rg   r>   r&   r&   r'   _check_dataset_from_path1  s    rk  c                 C   s   t | \}}t||| d S r5   r  rk  r<  r   r3   r`   r&   r&   r'   test_open_dataset_single_fileB  s    rn  c                 C   s    t | dd\}}t||| d S )Nr	   r  rl  rm  r&   r&   r'   test_deterministic_row_orderH  s    ro  c                 C   s&   t | \}}t|}t| || d S r5   )re  r*   concat_tablesrk  )r<  r   tablesr  r3   r&   r&   r'   test_open_dataset_directoryQ  s    
rr  c           	      C   s   t | \}\}}t|}t||gtt|t|gg}|dd |D 7 }|D ].}|j|jsjt|	|}||sTtqTd S )Nc                 S   s   g | ]}t t |qS r&   r  )rI   rN  r&   r&   r'   rK   a  s    z3test_open_dataset_list_of_files.<locals>.<listcomp>)
re  r*   rp  r   r>   rH   r)   r   r9   r   )	r<  r   rq  rc  rd  r3   Zdatasetsr>   r   r&   r&   r'   test_open_dataset_list_of_filesX  s    

rs  c              	   C   s   t | \}}t|}t|}|j|js0ttj|t d}|j|jsTtt	
t tj|t d W 5 Q R X d S )Nr  )r  r   r   r>   r)   r   r9   rQ   rd   r   r   r   rR   )r<  r3   r`   fspathdataset1dataset2r&   r&   r'   #test_open_dataset_filesystem_fspathk  s    
rw  c           
      C   s   | d }|   t|\}}||}t|}tj|t d}tjt|t|d}t	
t	|}	||||  kr||  kr||	ksn td S )Nsingle-filer  )mkdirr  relative_tor   r>   rQ   rd   rH   r   r@  rA  rB  r   r9   )
r<  r   r_   r3   r`   relative_pathd1d2d3d4r&   r&   r'   test_construct_from_single_file  s    


r  c                 C   s   | d }|   t|\}}t|}tj|t d}tj|jt| d}||}||}	||}
||	  kr~|
ksn t	|||fD ]&}t
t
|}|||kst	qd S )Nsingle-directoryr  )ry  re  r   r>   rQ   rd   rj  r   r   r9   r@  rA  rB  )r<  r   r_   rq  rk   r|  r}  r~  t1t2t3rN  restoredr&   r&   r'   $test_construct_from_single_directory  s    



r  c              	      s    d }|   t|\}} fdd|D }t 4 t|}||}t|ttt|ksft	W 5 Q R X tj|t
 d}||}	t|}
||
}tj|t d}||}||	  kr|  kr|ksn t	d S )Nzlist-of-filesc                    s   g | ]}|  qS r&   )rz  rf   r<  r&   r'   rK     s     z5test_construct_from_list_of_files.<locals>.<listcomp>r  )ry  re  r   r   r>   r   r   sumrX   r9   r   rQ   rd   )r<  r   r_   rq  rk   Zrelative_pathsr|  r  r}  r  r~  r  r  Zt4r&   r  r'   !test_construct_from_list_of_files  s    


$



r  c              	   C   s4   ddg}t jtdd tj|| d W 5 Q R X d S )Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r   r   r   r   r>   )r]   r   r&   r&   r'   -test_construct_from_list_of_mixed_paths_fails  s
    r  c                 C   s   t jddg| d}t jd| d}t ||g}t|t js>ttt| dksVt| }t|dksnt|j	dks|tt|j
dkst|j
D ]}|jddgkstqd S )	Nr   r   r  r   r   r&  r
   r   )r   r>   r8   UnionDatasetr9   r   rW   r6   r   r   childrenr   )r]   rE   rF   r>   r3   childr&   r&   r'   (test_construct_from_mixed_child_datasets  s"    
r  c                  C   s6   t jg dd} |  }|jdks$t|jdks2td S )Nr*  r   r   )r   r>   r   r   r9   r   )emptyr3   r&   r&   r'   test_construct_empty_dataset  s    r  c               	   C   sP   t jg dtdt fdt fgd} tjtdd | 	  W 5 Q R X d S )Nr*  rE   rT   r)   zMultiple matches for .*a.* in r   )
r   r>   r*   r)   r-   r/   r   r   r  r   )r  r&   r&   r'   *test_construct_dataset_with_invalid_schema  s    


r  c              	      s  t j| tdt  d}t j| tdt  d}tjjtt	dgdgd tjjtt	dgdgd}t
jtdd	 t ||g W 5 Q R X d
}t
jt|d	 t dddg W 5 Q R X d}t
jt|d	 t d  W 5 Q R X d}t
jt|d	" t  fddt	dD  W 5 Q R X d}t
jt|d	 t g  W 5 Q R X d}t
jt|d	 t  |g W 5 Q R X d}t
jt|d	 t  dg W 5 Q R X d}t
jt|d	 t  dg W 5 Q R X d S )Nr  r   /schemar   rE   r(  rF   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr	   r   rC   zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc                 3   s   | ]
} V  qd S r5   r&   rI   r  Zbatch1r&   r'   r  	  s     z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rQ   r   r   r*   r   r  r   r   r   r   r   r>   r  InMemoryDataset)r   child1child2Zbatch2r  r&   r  r'   )test_construct_from_invalid_sources_raise  sT    &r  c                 C   s   t jjt tdgdgd}t j|g}tjg dt 	g d
 }|t g ksXt|||g|gfD ]l}t|}| 
||ksttt| dkstt| 
 |kstt jt| |kshtqhd S )Nr   rE   r(  r*  r  r	   )r*   r   r  r   r   r0   rZ   r   r>   r)   r   r3   r9   r   rW   r6   r   r   )r   rb   r3   Zdataset_tablesourcer>   r&   r&   r'   test_construct_in_memory1	  s    
r  r   c              
      s   t jjt tdgdgd t j g}d} fddd f fdd jffD ]P\}}tj	j| || d}|
 |ks~ttjt j|d	 |
  W 5 Q R X qRd S )
Nr   rE   r(  z#OneShotFragment was already scannedc                      s   t j j gS r5   )r*   RecordBatchReaderrZ   r)   r&   rb   r&   r'   r  H	  s    z$test_scan_iterator.<locals>.<lambda>c                      s    fddt dD S )Nc                 3   s   | ]
} V  qd S r5   r&   r  r  r&   r'   r  J	  s     z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>r	   )r   r&   r  r&   r'   r  J	  r  r)   r   r   )r*   r   r  r   r   r0   rZ   r)   r   r  r   r9   r   r   r  )r   r3   r   r   r)   r   r&   r  r'   test_scan_iteratorA	  s     
  r  c              	   C   s   t tddgd dgd  d}| d }|  tdD ]6}|d	| }|  t|d| d|d
  q<|dt j	t
dddgdt  d}||fS )Nr`  r   r   r   r
   rD   zdataset-partitionedrC   zpart={}r  r   r   r	   r   r   )r*   r3   r   ry  rT   r[   r\   r  append_columnr   r   r6  r   )rJ  r3   r`   r%   r   
full_tabler&   r&   r'   _create_partitioned_datasetT	  s    $ r  c           	   	   C   sL  t | \}}|ddg}t||| tjt|tjddd}|j|jsRt	t
| . tjdtjddd}|j|jst	W 5 Q R X tjt|dd}|j|jst	tjt|tjtdt fgddd}|jtdt }|j|st	| }|dtjtdd	d
gdt d}||sHt	d S )NrE   rF   r   r  r   zdataset-partitioned/r   r   r	   r   rC   r   )r  r  rk  r   r>   rH   r   r)   r   r9   r   r*   r3  r   r+   r   r  r   r   r6  )	r<  r   r  r`   r3   r>   r!  r   r  r&   r&   r'   'test_open_dataset_partitioned_directorye	  s<     


  r  c              	   C   s   t | \}}tt|}|j|js,ttjt|t d}|j|jsTtt	|  tjdt d}W 5 Q R X |j|jstt
t tjt|t d W 5 Q R X d S )Nr  r  )r  r   r>   rH   r)   r   r9   rQ   rd   r   r   r   r   rR   )r<  r3   r`   ru  rv  dataset3r&   r&   r'   test_open_dataset_filesystem	  s    
r  c              	   C   s:   t | \}}tjtdd tj|gdd W 5 Q R X d S )Nz format 'blabla' is not supportedr   Zblablar   )r  r   r   r  r   r>   )r<  r  r`   r&   r&   r'   $test_open_dataset_unsupported_format	  s    r  c                 C   s`   t | \}}t|}t||g}t|tjs4ttt|}|	||	|ks\td S r5   )
r  r   r>   r8   r  r9   r@  rA  rB  r   )r<  r   r  r`   r>   unionr  r&   r&   r'   test_open_union_dataset	  s    
r  c              	   C   s>   t jd| dd}tjtdd t j|gdd W 5 Q R X d S )Nr  r}   r  zcannot pass any additionalr   r   )r   r>   r   r   r  )r   r  r&   r&   r'   .test_open_union_dataset_with_additional_kwargs	  s    r  c                	   C   sR   t t tjddd W 5 Q R X t jtjdd tjddd W 5 Q R X d S )Nzi-am-not-existing.arrowr*  r   zcannot be relativer   zfile:i-am-not-existing.arrow)r   r   r   r   r>   r*   r  r&   r&   r&   r'   #test_open_dataset_non_existing_file	  s    r  r   r_   r   r  r  partition_keysABCr	   r   rC   DEFr   r
   rF  c                    sn  t tddgd dgd  d}d |d kp:d |d k}|d	krL|rLd S |d	krrtjjd
dg d}d}d }n6|rtjj |d}ntjj d}d}|r|}nd}| d }	|	  |\}
}|
D ]B}|D ]8}|	||p||p| }|jdd t	
||d  qqtjt|	|d} fdd}|jt d
||
d t d||d }|j|sjtd S )Nr`  r   r   r   r
   rD   r   r	   r_   part1part2r$  z{0}/{1})r#  r  zpart1={0}/part2={1}Z__HIVE_DEFAULT_PARTITION__r>   T)parentsr  r  c                    sL    r.t | trt nt }tt |S t | tr@t S t S d S r5   )r8   rH   r*   r/   r   r2  )r   Z
value_typer$  r&   r'   r'  	  s    z/test_partition_discovery.<locals>.expected_type)r*   r3   r   r   r   r  r,  ry  rT   r[   r\   r>   rH   r)   r   r+   r   r9   )r<  r   r  r#  r  r3   Zhas_nullfmtZ
null_valueZbasepathZ
part_keys1Z
part_keys2r  r  r`   r>   r'  r!  r&   r$  r'   test_partition_discovery	  sR    $  r  c                 C   s\  t tddgdtdd}tj|dgjdd}tj	|| |d	d
 tj
| d	tjjddd}t |d |d  d}| |stt| d }|j|jd|d d st|j}tt|}| |sttt|}|j|jd|d d st|j|jd |d d  sFt|j|sXtd S )Nr  r  r
   r   r   r  r   r   r  r  r   rT   Tr$  r+  r  )r  r   r   rP   )r*   r3   r   r6  r   r   r   r  r)   r9  r>   r,  r  r  r   r   r9   rW   r6   r   r@  rA  rB  r  )r<  r3   r   r>   r  r?   Z	part_exprr  r&   r&   r'   4test_dataset_partitioned_dictionary_type_reconstruct

  s.       "
r  c              	   C   s   ddl m} | d \}}}}d||||}||\}}|d tdddd	gi}	|d
}
t	|	|
 W 5 Q R X |	|||||||fS )Nr   
FileSystem
connectionz_s3://{}:{}@mybucket/data.parquet?scheme=http&endpoint_override={}:{}&allow_bucket_creation=TruemybucketrE   r	   r   rC   zmybucket/data.parquet)
rt   r  rT   from_urirU   r*   r3   rV   r[   r\   )	s3_serverr  hostport
access_key
secret_keyr9  rQ   r`   r3   ra   r&   r&   r'   s3_example_simple)
  s       
r  c                 C   s^   | \}}}}}}}}t j|dd}|||s6tt j|d|d}|||sZtd S )Nr}   r   rT   r   )r   r>   r   r   r9   )r  r   r3   r`   rQ   r9  r  r>   r&   r&   r'   test_open_dataset_from_uri_s3?
  s
    r  c                 C   s   | \}}}}}}}}t d}ddlm}	m}
 |j||dd||id}tj|d|d}|	 
|slt|	|
|}tj|d|d}|	 
|std S )	Ns3fsr   )rc   FSSpecHandlerZendpoint_urlzhttp://{}:{})r   secretZclient_kwargsr}   r  )r   importorskiprt   rc   r  S3FileSystemrT   r   r>   r   r   r9   )r  r3   r`   r  r  r  r  r  r  rc   r  rQ   r>   r&   r&   r'   $test_open_dataset_from_uri_s3_fsspecN
  s    
 
	r  c              	   C   s  ddl m} | d \}}}}d}d}d||||||}||\}	}|dksRt|	| tdd	d
dgi}
|	|}t	
|
| W 5 Q R X tj|dd}| |
std||||}ddddddg}|D ]4\}}||}tj||dd}| |
stqtjtjdd |d}tjd|d W 5 Q R X d}d}||}tt}tjd|d W 5 Q R X t|j|d||kstd}||}tt}tjd|d W 5 Q R X t|j|d||kstd S ) Nr   r  r  theirbucketnested/folder/data.parquetzOs3://{}:{}@{}/{}?scheme=http&endpoint_override={}:{}&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrE   r	   r   rC   r}   r   3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{})ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r  z/nested/folder/data.parquet)r  r  r  zMissing bucket namer   /z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  ZNotFoundFile)rt   r  rT   r  r9   rU   r*   r3   rV   r[   r\   r   r>   r   r   r   r   r  r  rH   r   )r  r  r  r  r  r  Zbucketr`   r9  rQ   r3   ra   r>   templater  prefixr  excr&   r&   r'   -test_open_dataset_from_s3_with_filesystem_urih
  sj         
   



r  c                 C   sD   t | \}}td}|d}tj||d}|j|js@td S )Nfsspecfiler  )	r  r   r  r   r   r>   r)   r   r9   )r<  r3   r`   r  ri   r>   r&   r&   r'   test_open_dataset_from_fsspec
  s
    

r  c           	      C   s   t d}tddddgi}| d }t|| |d}|| d dsTt	t
 }tt|}|||}||jst	|||}|j|jst	d S )	Nr  rE   r	   r   rC   r  r  r   )r   r  r*   r3   r[   r\   r   Zlsendswithr9   r   r   rQ   rc   r  r  r   r)   r   r  )	r<  r  r3   r`   	fsspec_fsrT   r   r)   r?   r&   r&   r'   test_file_format_inspect_fsspec
  s    

r  c                 C   s  | d }t ddgd tdd}tj|dgjdd	}tj|||d
d tjt dt dfgdd	}tj	|d
|d}t
dtdk}|j||d}|d dddddgkstdd l}t
d|dddk}|j||d}|d dddddgkstd S )NZtest_partition_timestamps
2012-01-01z
2012-01-02r
   r   )datesr  r  r   r  r  r  r]  r+  r   r  r	   rC   r   r`  r   i  )r*   r3   r   r   r   r  r)   r9  r  r>   r+   r   	Timestampr   r   r  r9   r   )r<  r   r`   r3   r   r>   r   r   r&   r&   r'   test_filter_timestamp
  s$    
 r  c              
   C   sp   t dt jddddddgt  di}t| |\}}tt|}tddk}t	|j
||d	dksltd S )
NrE   r   r	   r   rC   r   r
   r   r   )r*   r3   r   r3  r  r   r>   rH   r+   r   r   r9   )r<  r   r3   r  r`   r>   filter_r&   r&   r'   test_filter_implicit_cast
  s
    (r  c                 C   s`   t dddd gi}t| |\}}tt|}|j|tdtd kd}|j	dks\t
d S )Nr  rE   rF   r   r   )r*   r3   r  r   r>   rH   r   r+   r   r   r9   )r<  r   r3   r  r`   r>   r&   r&   r'   test_filter_equal_null
  s     r  c           	      C   s  t ddd ddgdd tdD dd tdd	D d
}t| |\}}tt|}tt	dt 
ddg}|j||djdksttt	ddk}|j||djdksttt	dt	d}|j|d|id}|d  dddddgkstd S )NrE   rF   rO  c                 S   s   g | ]}t  d dd|qS i  r	   r  r  r&   r&   r'   rK   	  s     z2test_filter_compute_expression.<locals>.<listcomp>r
   c                 S   s   g | ]}t  d d|qS r  r  r  r&   r&   r'   rK   
  s     r	   rF  r  r  r  r  r   rC   r  r   r  r   r   r   r   )r*   r3   r   r  r   r>   rH   pcZis_inr+   r   r   r   r9   hourZdays_betweenr  )	r<  r   r3   r  r`   r>   r  r   r   r&   r&   r'   test_filter_compute_expression  s    r  c                 C   s   t j| tdt  d}t |g}t| dks:tt	dd | D sTt| d 
| snt| 
| stt| t jstd S )Nr  r   r	   c                 s   s   | ]}t |tjV  qd S r5   )r8   r*   r   )rI   r]  r&   r&   r'   r  #  s     z%test_dataset_union.<locals>.<genexpr>r   )r   r   rQ   r   r   ZUnionDatasetFactoryr   r  r9   r.  r   r  r8   r   r   )r   r  r   r&   r&   r'   test_dataset_union  s     r  c                 C   s  t jd|dd}t jd|dddgd}t jd|dd	d}|j|j  krR|jksXn tt |||g}t|t jsxtd
}tjt|d t j||g|d W 5 Q R X t	dt	
 fdt	 fdt	 fdt	 fdt	 fdt	 fdt	 fg}|j|s
t| j|s tt ||g}t	dt	
 fdt	 fdt	 fdt	 fdt	 fdt	 fg}|j|st| j|stt	dt	 fdt	 fdt	
 fg}t j||g|d}| j|stt	dt	 fdt	 fdt	 fg}t j||g|d}| j|s<tt	jtddgd dgd  dgdddgd}t| |d\}	}
t |
}tjt	jdd t ||g W 5 Q R X d S )Nr  r}   r  r  weekr   r   rT   r   /hiver   z$cannot pass any additional argumentsr   r  r   r   r   r   r   rP   r   r`  r   r   r   r
   Z	abcdefghjr(  r  zUnable to merge)r   r>   r)   r9   r8   r  r   r   r  r*   r,   r-   r.   r/   r   r   r   r3   r   r  r  )r<  r   r  r  Zchild3Z	assembledmsgr!  r3   r  r`   Zchild4r&   r&   r'   &test_union_dataset_from_other_datasets)  sp    

 






	











 
r  c              	   C   s6   d}t jt|d tjdddg| d W 5 Q R X d S )Nz8points to a directory, but only file paths are supportedr   r  r  r  r  )r   r   IsADirectoryErrorr   r>   )r   r  r&   r&   r'   4test_dataset_from_a_list_of_local_directories_raisesk  s    r  c              
   C   s   t t jd| dt jd| dt jd| dg}tdt fdt fdt fdt fg}|j|spt	t t jd| dt jd| dt jd| d	d
g}tdt fdt fdt fdt fdt
 fdt
 fg}|j|st	d S )Nr  r  r  r  r   r   r   r   r   )r   r   r   r   )r   r>   r*   r)   r,   r-   r.   r/   r   r9   r   )r   r>   r!  r&   r&   r'   &test_union_dataset_filesystem_datasetsq  s4    









r  c              	      s  t dddgdddgd}t|d  d fd	d
	}d }|}||||jd |j}|}||| t ddg}t jdddgdddggddgd}||| t dg}t jdddggdgd}||| t ddg}t jdddgt jd d d gddgddgd}||| t ddg}tjtd |d}t j|d 	d|d gddgd}||| t dt 
t  fdg}tjtd |d}|j|sttjtdd  | W 5 Q R X d S )Nr	   r   rC   皙?皙?333333?rD   r  c                    s\   t jtd | d}|d k	r0|j|s@tn|j| s@t |}||sXtd S )Nr  rP   )r   r>   rH   r)   r   r9   r   )r)   r  r!  r>   r   r   r<  r&   r'   rf    s    
z-test_specified_schema.<locals>._check_dataset)r!  )rF   r.   )rE   r-   rF   rE   r(  )rO  r   r   r   rO  )rE   r   rP   z#Unsupported cast from int64 to listr   )N)r*   r3   r[   r\   r)   r   r   r>   rH   rH  list_r   r   r9   r   r   NotImplementedErrorr   )r<  r   r3   rf  r)   r  r>   r&   r  r'   test_specified_schema  sJ    

"



r  c              	   C   s   | d }t ddddgi}t|| t dt  fg}tjt|gd |d}|j	|sdt
||}tjtdd	 | }|  W 5 Q R X d S )
Nr  rE   r	   r   rC   d   rP   z#Unsupported cast from int64 to nullr   )r*   r3   r[   r\   r)   rP  r   r>   rH   r   r9   r   r   r   r  r  r  )r<  r   fnr3   r)   r>   r   r   r&   r&   r'   test_incompatible_schema_hang  s    
r  c           	   	   C   s   t t jdddgddt jdddgd	dd
}t| d }t |.}t ||j}|| d  |	  W 5 Q R X t
j|t
 d}||}||stt| dD ]*}t
j||d}||}||stqd S )Nr	   r   rC   r3  r   r  r  r  r.   rD   z
test.arrowr   r   )r*  arrow)r*   r3   r   rH   Zoutput_streamZRecordBatchFileWriterr)   Zwrite_batchr   r1  r   r>   r   r   r   r9   rA   )	r<  r   r3   r`   r6  r7  r>   r   
format_strr&   r&   r'   test_ipc_format  s     

r   c              	   C   s  ddl m} ttjdddgddtjdd	d
gddd}t| d }||| tj|t	 d}t
| }t|d tjst||}|jdd ||stt| tj|dd}||}|jdd ||st|j|dgd}|jdd ||dgst|j|dtdd id}|jdd |tdtjd	ddgddisjt||dks~t|j|tddkddkstd S )Nr   orcr	   r   rC   r3  r   r  r  r  r.   rD   test.orcr   T)fullr  rF   r   b2g?g333333?rE   r   )r   r  r*   r3   r   rH   r\   r   r>   r{  rW   r6   r8   ZFileFragmentr9   r   validater   rA   r  r+   r   )r<  r   r  r3   r`   r>   r   r   r&   r&   r'   test_orc_format  s<    

 
r  c                 C   s   ddl m} ttjdddgddtjdd	d
gddd}t| d }||| tj|dd}t	|
|}t|dkst|d jdkst|d |
 d std S )Nr   r  r	   r   rC   r3  r   r  r  r  r.   rD   r  r  r   )r   r  r*   r3   r   rH   r\   r   r>   rW   r   r   r9   r   r   )r<  r   r  r3   r`   r>   r   r&   r&   r'   test_orc_scan_options  s    r  c                  C   sR   zddl m}  W n< tk
rL   tjtdd tjddd W 5 Q R X Y nX d S )Nr   r{  z'not built with support for the ORC filer   r  r  r   )r  r{  r|  r   r   r  r   r>   r	  r&   r&   r'   test_orc_format_not_supported1  s     r
  c               	   C   sf   t jtdd$ tjtdtdiddd W 5 Q R X t } t jtdd | 	  W 5 Q R X d S )Nz9Writing datasets not yet implemented for this file formatr   rE   r   r  z/tmp)rT   rb  )
r   r   r  r   r9  r*   r3   r   r{  make_write_options)Zofr&   r&   r'   +test_orc_writer_not_implemented_for_dataset<  s      r  c                 C   s   t t jdddgddt jdddgd	dd
}t| d }| j|dd tj|t d}|	|}|
|sxtt| tj|dd}|	|}|
|std S )Nr	   r   rC   r-   r   r  r  r  r.   rD   test.csvFr   r   rv  )r*   r3   r   rH   r  to_csvr   r>   ru  r   r   r9   rA   )r<  r   r3   r`   r>   r   r&   r&   r'   test_csv_formatN  s    

r  compressionbz2gzipZlz4Zzstdc              	   C   s   t j|std| ttjdddgddtjddd	gd
dd}t	
 }|dkr`|nd}t| d|  }|j||d&}| jdd}||d W 5 Q R X tj|t d}	||	}
|
|std S )Nz{} support is not builtr	   r   rC   r-   r   r  r  r  r.   rD   r  gzz	test.csv.r  Fr  r  r   )r   Codecis_availabler   skiprT   r*   r3   r   rQ   rd   rH   rV   r  r  writer  r   r>   ru  r   r   r9   )r<  r  r   r3   r   suffixr`   r6  Zcsv_strr>   r   r&   r&   r'   test_csv_format_compresseda  s    
r  c                 C   s  t | d }t|d}|d W 5 Q R X tj|dd}||}|tdt	ddd	gisht
tj|tjtjjd
ddd}||}|tdt	dd	gist
tj|tjtjjdgddd}||}|tdt	dddd	gist
d S )Nr  wzskipped
col0
foo
bar
rv  r   Zskippedcol0rl  barr	   )rm  rn  r  )rH   r7   r  r   r>   r   r   r*   r3   r   r9   ru  rv  rx  )r<  r   r`   r6  r>   r   r&   r&   r'   test_csv_format_options|  s(    



"


r   c              
   C   s   t | d }t|d}|d W 5 Q R X tj|tjtjjdddd}|	|}dd	d
dg}|j
|kspt|ttdgtdgtdgtdgdstd S )Nr  r  z1,a,true,1
T)Zautogenerate_column_namesrn  r   f0r   r'  f3r	   rE   )r!  r   r'  r"  )rH   r7   r  r   r>   ru  r*   rv  rx  r   r  r9   r   r3   r   )r<  r   r`   r6  r>   r   Zexpected_column_namesr&   r&   r'   (test_csv_format_options_generate_columns  s    





r#  c           	   
   C   s  t | d }t|d}|d W 5 Q R X tj|dd}tjjdgdd}tj|t	jj
d	d
d}|j||d}|t	dt	ddd gisttj|d}tj||d}||}|t	dt	ddd gistt }|j||d}|t	dt	dddgistd S )Nr  r  zcol0
foo
spam
MYNULL
rv  r   ZMYNULLT)Znull_valuesr  r  r  )r  r^  )Zfragment_scan_optionsr  rl  Zspamr  )rH   r7   r  r   r>   r   rv  r  r  r*   rx  r   r   r3   r   r9   ru  )	r<  r   r`   r6  r>   r  r   r   r  r&   r&   r'   test_csv_fragment_options  s,    
$
$r$  c              	   C   s   t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
|t	 d}||}||stt| t	j
|dd}||}||std S )Nr	   r   rC   r-   r   r  r  r  r.   rD   	test.jsonrecordsZorientrE  },{}
{r  r   rz  )r*   r3   r   rH   r  to_jsonreplacer7   r  r   r>   ry  r   r   r9   rA   r<  r   r3   r`   ra   rK  r>   r   r&   r&   r'   test_json_format  s     

r-  c              	   C   s   t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
tdd& tj|tjt jjdddd}W 5 Q R X tj|tjt jjdddd}||}||std S Nr	   r   rC   r-   r   r  r  r  r.   rD   r%  r&  r'  rE  r(  r)  r  ztry to increase block sizer   r   r  rn  r   @   )r*   r3   r   rH   r  r*  r+  r7   r  r   r   r  r   r>   ry  rz  rx  r   r   r9   r,  r&   r&   r'   test_json_format_options  s$     



r0  c           	   	   C   s  t t jdddgddt jdddgd	dd
}t| d }| jdddd dd}t|d}|| W 5 Q R X t	j
tdd0 tjt jjddd}tj|t|d}W 5 Q R X tjt jjddd}tj|t|d}||}||std S r.  )r*   r3   r   rH   r  r*  r+  r7   r  r   r   r  r   r  rz  rx  r>   ry  r   r   r9   )	r<  r   r3   r`   ra   rK  r   r>   r   r&   r&   r'   test_json_fragment_options  s(     
r1  c              
   C   s   t | d }dD ]\}}t|d}|| W 5 Q R X tdt fdt fg}tjdgdgd|d	}tjj|d
}t	j
|d}	t	j||	d}
|
j|st|
 |stqd S )Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrE   rF   un
   éléphantrD   rP   encodingrn  r   )rH   r7   r  r*   r)   r/   r3   rv  rx  r   ru  r>   r   r9   r   )r<  r   r`   r8  Z
input_rowsr6  r!  expected_tabler^  r   dataset_transcodedr&   r&   r'   test_encoding  s    r;  c           
   	   C   s   t | d }t|d}|d W 5 Q R X tdt fdt fg}tjdgdgd|d	}tj|d
|d}t	j
tjjdd || W 5 Q R X tjjdd}tj|d}tj||d}	|	j|st|	 |std S )Nr  r4  s   ,b
un,lphant   érF   r5  r6  )r<  rF   rP   rv  r  zinvalid UTF8r   r2  r7  rn  r   )rH   r7   r  r*   r)   r/   r3   r   r>   r   r   r   libr  r   rv  rx  ru  r   r9   )
r<  r   r`   r6  r!  r9  r>   r^  r   r:  r&   r&   r'   test_column_names_encoding  s"    r>  c              	   C   sB  ddl m} ttjdddgddtjdd	d
gddd}| d }|  ||t|d  tj|t	 d}|
|}||stt| tj|dd}|
|}||st|j
|ddgd}|jddgkst|j
|ddgd}|jddgkst||t|d dd tt |
tj|dd W 5 Q R X d S )Nr   )r  r	   r   rC   r3  r   r  r  r  r.   rD   Zfeather_datasetr  r   r  rF   rE   r   zdata1.featherversion)pyarrow.featherr  r*   r3   r   ry  rH   r   r>   r   r   r   r9   rA   r  r   r   r  )r<  r   r  r3   rJ  r>   r   r&   r&   r'   test_feather_format+  s*    

rB  brotlic              	   C   s  t t jdgd ddt jdddgd d	dd
}t j|sHt  | d }|  t	 }| d }|  tj
|t|d ||jd dd |dkrtjtdd |j|d}W 5 Q R X tjtdd t |}|j|d}W 5 Q R X d S |j|d}tj
|t|d ||d tj|t	 d}	||	}
|
|sFt|d d }| j}|d d }| j}||k std S )Nr   i,  r3  r   r  r  r  r  r.   rD   Zfeather_dataset_compressedZfeather_dataset_uncompressedz
data.arrowr  rT   Zfile_optionsrC  zCompression typer   r   part-0.arrow)r*   r3   r   r  r  r   r  ry  r   r   r9  rH   r  r   r  r>   r   r   r9   statst_size)r<  r  r   r3   rJ  r   Zuncompressed_basedirZwrite_optionscodecr>   r   Zcompressed_fileZcompressed_sizeZuncompressed_fileuncompressed_sizer&   r&   r'   test_feather_format_compressedK  sP    






rJ  c                 C   sj   g }t dD ]6}t|gd tjdd}tj|t| |d qt| d }tj	|j
||d ||fS )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    r   r   r  metadata_collector	_metadata)r   r*   r3   r   r4  r5  r[   r  rH   write_metadatar)   )	root_pathrL  r%   r3   metadata_pathr&   r&   r'   _create_parquet_dataset_simple  s       rQ  c                 C   s\   | d }t |\}}t|}|j|js0tt|jdksBt| }|j	dksXtd S )Nr  r   (   )
rQ  r   parquet_datasetr)   r   r9   r   r   r   r   )r<  rO  rP  r3   r>   r   r&   r&   r'   test_parquet_dataset_factory  s    
rT  win32z'Results in FileNotFoundError on Windows)reasonc           	      C   s   t d}| d }t|\}}|d}tt|}tj||d}|j	
|j	sXtt|jdksjt| }|jdkstd S )Nr  r  r  r  r   rR  )r   r  rQ  r   rQ   rc   r  r   rS  r)   r   r9   r   r   r   r   )	r<  r  rO  rP  r3   r  r   r>   r   r&   r&   r'   #test_parquet_dataset_factory_fsspec  s    

rW  r  c                 C   s   | d }t dgd tjdd}g }tj|t|||d t|d }tj|j	||d t
|}|j	|j	szt| }|jdkstd S )Nr  r   r   r  )rL  r  rM  rK  )r*   r3   r   r4  r5  r[   r  rH   rN  r)   r   rS  r   r9   r   r   )r<  r  rO  r3   rL  rP  r>   r   r&   r&   r'   &test_parquet_dataset_factory_roundtrip  s&       
rX  c           	   	   C   s   g }t dD ]Z}tdtt |d |d d i}| | d }tj|||d |d | d qt| d }t|j	|| t
|}| }|d }|tt dd	kstd S )
Nr   r   r	   z.parquetrK  rE  rM  r   r  )r   r*   r3   rW   r[   r\   set_file_pathrH   rN  r)   r   rS  r   r   r  r9   )	r<  Z	metadatasr%   r3   Z
table_pathrP  r>   Zscanned_tableZscanned_colr&   r&   r'   "test_parquet_dataset_factory_order  s    
rZ  c              	   C   sz   | d }t |\}}t|dd   t|}|j|jsFtt	|j
dksXttt |  W 5 Q R X d S )NZtest_parquet_dataset_invalid	*.parquetr   r   )rQ  rW   globunlinkr   rS  r)   r   r9   r   r   r   r   r   r   )r<  rO  rP  r3   r>   r&   r&   r'   $test_parquet_dataset_factory_invalid  s    
r^  c                 C   sz   t t| d}t|d j }g }|D ].}t|j}|t	|
|  || q.| d }tj|||d |S )Nr[  r   rM  rK  )rW   r   rglobr[   ZParquetFiler)   Zto_arrow_schemar  rY  rH   rz  r   rN  )rO  Zparquet_pathsr)   rL  r`   r  rP  r&   r&   r'   _create_metadata_file  s      r`  c              	   C   st   t jt tdt tjdt tddgdgdddgd}|d	d
i}t	j
|t| dgd t| |fS )Nr&  rE   rF   r   r   r'  r   r(  r   r   r  )r*   r3   r   r   r   r4  r5  r6  r1   r[   r  rH   r`  )rO  r3   r&   r&   r'   #_create_parquet_dataset_partitioned  s     ra  c                 C   s   | d }t |\}}tjdd}tj||d}|j|js@tt|jdksRt|	 }|j
dksht| djdd	}| }tj|| d S )
N(test_parquet_dataset_factory_partitionedr   r  r  r   r&  r   TZdrop)ra  r   r   rS  r)   r   r9   r   r   r   r   r  sort_valuesreset_indexr   testingZassert_frame_equal)r<  rO  rP  r3   r   r>   r   r  r&   r&   r'   rb    s    rb  c                 C   sh   | d }t |\}}tj|dd}|j|js4td|jjksDtt| }d|d j	jksdtd S )N%test_parquet_dataset_factory_metadatar   r     keyr   )
ra  r   rS  r)   r   r9   r  rW   r6   r  )r<  rO  rP  r3   r>   r   r&   r&   r'   rg  .  s    rg  c           
   	   C   s   |\}}| d }t |\}}||g tj|tjdd|d}W 5 Q R X |g  t| }W 5 Q R X |g  t|tddk W 5 Q R X |g  |d tddk W 5 Q R X |g  |d  }	|	d   W 5 Q R X d S )N#test_parquet_dataset_lazy_filteringr   r  )r   r   r      r   )	rQ  r   rS  r   rW   r6   r+   r   r  )
r<  ry   rQ   rs   rO  rP  r  r>   r   Zrg_fragmentsr&   r&   r'   ri  =  s$    


"
"
ri  c                 C   sr   t ddddgi}| d }|| t|}||j}|j|dgdj}d|jks\t|j	|dd	sntd S )
NrE   r	   r   rC   r  r   s   pandasTr  )
r   r   r  r   r>   r   r)   r  r9   r   )r<  r   r2   r`   r>   r)   r  r&   r&   r'   test_dataset_schema_metadataf  s    

rk  c                 C   s   t dt jddddgddi}t|t| d  t dt  fg}tj	| d d	|d
}|j
|tddkd}|d |d ddstt| d }|j
|tddk|d}|d |d ddstd S )Nr  r	   r   rC   r   r   r   r  r}   r  r   r-   r   r  )r*   r3   r   r[   r\   rH   r)   r-   r   r>   r   r+   r   rH  r  r9   rW   r6   )r<  r   r3   r)   r>   filteredr?   r&   r&   r'   test_filter_mismatching_schemax  s"       "  rm  c                 C   s   t d ttdd}t| d }tj||dgd tj	|dd}|
|}|j
|dgd	}|d|ds|td S )
Nza a b br   r  r   r   r  r   r  r   )r*   r3   r  rW   r   rH   r[   r  r   r>   r   r   r   r9   )r<  r   r3   r`   r>   Zall_colsZ	part_onlyr&   r&   r'   +test_dataset_project_only_partition_columns  s    
rn  c                 C   s   dd l }|dtjd d d gddi}| d }|j|dd tj|dtdt	 fgd	}t
dtd d d gt	 i}|||std S )
Nr   r  objectZdtypez(test_dataset_project_null_column.parquetr   r
  r}   r  )r|   r   r   r   r  r   r>   r*   r)   r-   r3   r   r   r9   )r<  r   r   r2   rK  r>   r  r&   r&   r'    test_dataset_project_null_column  s     rq  c              	   C   s   ddl m} tdddgdddgd	d
dgd}||| d  tj| d dd}|j|tdtdj	dddtdd	kdd}tdddgtj
dddgdddddgd}||sttjtdd |j|ddid W 5 Q R X d S )Nr   r  r	   r   rC   r   r   r   rE   rF   rO  r  r  r  r   r  r  r   Fsafer  )Z	A_renamedZB_as_intZC_is_ar   r   TzExpected an Expressionr   )r   r  r*   r3   r  r   r>   r   r+   rH  r   r   r9   r   r   r   )r<  r   r  r3   r>   r   r  r&   r&   r'   test_dataset_project_columns  s"    $
ru  c           	      C   s  t | \}}t|}t|jtjs(tt| \}}t|}t|jtjsPttj|dd}|j}|d k	sptt|tjst|j	t
	dt
 fgkstt|jdkst|jd t
dddgt
 ksttjt
	dt
 fgdd}t|tjstt|jdksttdd	 |jD s2ttj||d}|j}t|tjsXt|j	t
	dt
 fgksxtt|jdksttd
d	 |jD sttj|dd}tjt| |j	|j|jd}|jd kst| d }t|\}}tj|dd}|j}|d k	stt|tjs,t|j	t
	dt
 fgksLtt|jdks`tt|jd  ddhkstd S )Nr   r  r   r	   r   r   r  c                 s   s   | ]}|d kV  qd S r5   r&   r   r&   r&   r'   r    s     z6test_dataset_preserved_partitioning.<locals>.<genexpr>c                 s   s   | ]}|d kV  qd S r5   r&   r   r&   r&   r'   r    s     r   zdata-partitioned-metadatarE   rF   )r  r   r>   r8   r   r   r9   r  r,  r)   r*   r   r   r%  r   r.  r   rW   r6   rT   r   ra  rS  r/   ru   r  )	r<  r  r`   r>   r  r   rv  rO  rP  r&   r&   r'   #test_dataset_preserved_partitioning  sP    

$ 
   rv  c                 C   s   t t dt  t dt t  t  g}t jd d ddgtt	dd|d}t
| d }tj||dgdd	 t| d }|d |d kst|d|dstd S )
Nr  r   rE   r   r  rP   r   Fr  r  )r*   r)   r+   r-   r2  r   r/   r3   rW   r   rH   r[   r  rC  r   r  r9   r   )r<  r)   r3   r`   Zactual_tabler&   r&   r'   +test_write_to_dataset_given_null_just_works  s(    
rx  c                 C   s   t t dt  t dt t  t  g}t jddd d gtt	dd|d}t jddgtt	dd|d}t
| d }tj||dgd	d
 t| d }||kstd S )Nr  r   rE   r   r  rP   r   r   Trw  )r*   r)   r+   r-   r2  r   r/   r3   rW   r   rH   r[   r  rC  r9   )r<  r)   r3   r  r`   r  r&   r&   r'   'test_legacy_write_to_dataset_drops_null  s*    
 ry  c                 C   s2   dd l m} |j| ||dfgd}|| |S )Nr   	ascending)r   )pyarrow.computecomputeZsort_indicesZSortOptionsr   )tabsort_colr  Zsorted_indicesr&   r&   r'   _sort_table0  s     r  c                 C   st   |p|}t j| |d|dd t|d}t|t|ks>tt j|d|d}t| |	t|  |sptd S )Nr  FrT   r   r   *r+  )
r   r9  rW   r_  ru   r9   r>   r  r   r   )r>   rb  expected_filesr~  Zbase_dir_pathr   
file_pathsrv  r&   r&   r'   _check_dataset_roundtrip7  s    
   r  c                 C   s   | d }|   t|}t|}| d }|d g}t|t||d| | d }|d g}t|||d| | d }|   t|}t|}| d }|d g}t|t||d| d S )Nrx  zsingle-file-targetrE  rE   zsingle-file-target2r  zsingle-directory-target)ry  r  r   r>   r  rH   re  )r<  r_   r  r>   targetr  r&   r&   r'   test_write_datasetJ  s"    





r  c                 C   s   | d }t |}tjdd}tj||d}| d }|d |d d |d |d d g}tjtd	t fgdd}t|t||d
||d | d }|d |d d |d |d d g}ttd	t fg}t|t||d
||d d S )Npartitionedr   r  r  zpartitioned-hive-targetpart=arE  part=br   r   partitioned-dir-targetrE   rF   )	ra  r   r   r>   r*   r)   r/   r  rH   )r<  r_   r  r   r>   r  expected_pathsr;  r&   r&   r'   test_write_dataset_partitionedn  sN     
 
      
 
    r  c                    s   t ddd gdddgd}tj| ddgd tj ddgd}|j} fdd	|D }|dddhkslt| }||std S )
Nr   r<  r  rD   r*  rF   r+  c                    s"   h | ]}t t| jqS r&   rH   rg  rh  rz  ri  rJ  r  r&   r'   rj     s    z6test_write_dataset_with_field_names.<locals>.<setcomp>	r*   r3   r   r9  r>   r   r9   r   r   r<  r3   r=  r   Zpartitioning_dirsr>  r&   r  r'   #test_write_dataset_with_field_names  s    

r  c                    s   t ddd gdddgd}tj| ddgdd tj ddd	}|j} fd
d|D }|dddhkslt| }||std S )Nr   r<  r  rD   r*  rF   r   )rT   r   partitioning_flavorr+  c                    s"   h | ]}t t| jqS r&   r  rJ  r  r&   r'   rj     s    z;test_write_dataset_with_field_names_hive.<locals>.<setcomp>zb=xzb=yzb=zr  r  r&   r  r'   (test_write_dataset_with_field_names_hive  s    
 
r  c              	   C   s   t ddd gdddgdddgd}tj|| dd	gd
 tj| dd	gd
}t ^}tj|jd	dgd|dd	gd
 tj|dd	gd
}| }t	|
 |d
 kstW 5 Q R X d S )Nr   r<  r  r	   r   rC   )rE   rF   rO  r*  rF   r+  rO  r   rE   )r*   r3   r   r9  r>   r7  r8  r   r   r  r   Zdrop_columnsr9   r<  r3   r>   Ztempdir2r=  r>  r&   r&   r'   test_write_dataset_with_scanner  s$    

  
r  c           	         s6  t  G fdddt}t|t ttdt	 g}tj
tttdg|d dd}dd	 fd
d}tjj| |d	dt jfddd}|  z`t fdd}d}d}| dk r|kr|krd	}q}td q|stW 5 d  |  X d S )Nc                       s   e Zd Z fddZdS )z6test_write_dataset_with_backpressure.<locals>.GatingFsc                    s       | jj||dS )Nr  )waitrn   rV   )rp   r`   r  consumer_gater&   r'   rV     s    zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamN)r   r   r   rV   r&   r  r&   r'   GatingFs  s   r  r$   rg  rP   r          Tc                   3   s.   k r*sd S t d d7  V  q d S )Ng{Gz?r	   )r  sleepr&   )rb   batches_readend
keep_goingr&   r'   counting_generator  s    
z@test_write_dataset_with_backpressure.<locals>.counting_generatorr  c                      s   t jtd dS )Nr}   r  )r   r9  rH   r&   )	gating_fsr   r<  r&   r'   r    s
      z6test_write_dataset_with_backpressure.<locals>.<lambda>)r  Fc                      s   t     S r5   )r  r&   )startr&   r'   duration  s    z6test_write_dataset_with_backpressure.<locals>.durationr   rG  )	threadingEventr   rQ   rc   rd   r*   r)   r+   r   rY   r   rW   r   r   r  rZ   Threadr  ru   rG  r  r  r9   )	r<  r  r)   Zmin_backpressurer  Zwrite_threadr  
last_valueZbackpressure_probably_hitr&   )	rb   r  r  r  r  r  r   r  r<  r'   $test_write_dataset_with_backpressure  sF    	  
r  c              	   C   s   t dddgdddgd}tj|| dd	gd
 tj| dd	gd
}t L}tj||dd	gd
 tj|dd	gd
}| }t|	 |	 kst
W 5 Q R X d S )Nr   r<  r  r	   r   rC   rF   rO  r*  rF   r+  )r*   r3   r   r9  r>   r7  r8  r   r  r   r9   r  r&   r&   r'   test_write_dataset_with_dataset  s    

 r  c           	   	   C   s  | d }t dddgdddgd}tjt t d	t  gd
d}dd }tj|||dd t ddd	gdddgd}t	t j
 tj|||dd W 5 Q R X t ddgi}|d d }tj|| tj|||ddd t ddddd	gdddddgd}tj| d|d }||| | s4ttj|||ddd t dddd	gddddgd}tj| d|d }||| | rtd S )Nr   r   r<  r  r	   r   rC   r  rO  r   )r)   r  c                 S   s>   |   djdd}|  djdd}||s:td S )NrF   Trc  )r  rd  re  r   r9   )r  r  Zdf1Zdf2r&   r&   r'   compare_tables_ignoring_order(  s    zGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderr*  r  rE   rF   r   ezc=2z	foo.arrowoverwrite_or_ignore)r   rT   existing_data_behaviorr+  Zdelete_matching)r*   r3   r   r   r)   r+   r-   r9  r   r   r  r   r  r  r>   r   existsr9   )	r<  r_   r3   r   r  Zextra_tableZ
extra_fileZoverwrittenZreadbackr&   r&   r'    test_write_dataset_existing_data!  sP     



  

r  r   c                 C   s   t j||| S r5   )r   r4  randintsizer  r  r&   r&   r'   _generate_random_int_arrayR  s    r  c                 C   sN   g }g }t | D ]*}|t|d|d |dt|  qtj||d}|S )Nr	   r  rO  r$   r)  )r   r   r  rH   r*   rY   )num_of_columnsnum_of_recordsr$   r  r%   rY   r&   r&   r'   _generate_data_and_columnsV  s    r  c                 C   s   t tt| d| S )Nz**/*.)r   rW   rg  rh  r\  Zbase_directoryr   r&   r&   r'   _get_num_of_files_generatedb  s    r  c                    s   | d }d d}d}d}t ||}tj||d |d t|}|  d }t||ks\tg }t|D ]6\}	}
|t|
 }tj	|dd}|
| jd	  qh|t|kst|t|kstt fd
d|D std S )Nr   r   r   #   r}   )rT   max_rows_per_filemax_rows_per_groupr	   r   r   c                 3   s   | ]}| kV  qd S r5   r&   )rI   Zfile_rowcountr  r&   r'   r    s   z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>)r  r   r9  rE  rK  r   r9   rS   rH   r>   r   r   shaper  r.  )r<  r_   r  r  r  rY   files_in_dirZexpected_partitionsZresult_row_combinationr  f_filef_pathr>   r&   r  r'   $test_write_dataset_max_rows_per_filef  s2    

r  c              
      s   | d }d}d}d ddddddddddg
} fdd|D }|d	 }t j||||d
d t|}t|D ]|\}}	|t|	 }
t j|
d
d}| }| }t|D ]@\}}|j	}|t
|d k r||kr||kstq||kstqqld S )Nr   rF  r   r   r
   r   c                    s   g | ]}t  |qS r&   )r  )rI   r  r  r&   r'   rK     s   z9test_write_dataset_min_rows_per_group.<locals>.<listcomp>Zmin_rows_groupr}   )r$   rb  min_rows_per_groupr  rT   r   r	   )r   r9  rE  rK  rS   rH   r>   r   r   r   r   r9   )r<  r_   r  r  Zrecord_sizesZrecord_batchesdata_sourcer  r  r  r  r>   r3   batchesr  rb   Zrows_per_batchr&   r  r'   %test_write_dataset_min_rows_per_group  s6    

r  c                 C   s   | d }d}d}d}t ||}|d }tj|||dd t|}g }|D ]D}	|t|	 }
tj|
dd}| }| }|D ]}|	|j
 q|qJ|dd	gkstd S )
Nr   r  r      Zmax_rows_groupr}   )r$   rb  r  rT   r   r  )r  r   r9  rE  rK  rH   r>   r   r   r   r   r9   )r<  r_   r  r  r  rY   r  r  Zbatched_datar  r  r>   r3   r  rb   r&   r&   r'   %test_write_dataset_max_rows_per_group  s,    
r  c                 C   s~  | d }d}d}ddg}t jddddd	d
gddddddgg|d}t jddddd	dgddddddgg|d}t jdd
ddd	dgddddddgg|d}t jddddd	dgddddddgg|d}t j||||g}	tjt || t  fgdd}
|d }tj|	||
|d d d! }|||||\}}||ks8t	|d" }d}tj|	||
||d#d$ |||||\}}||kszt	d S )%Nr   r}   r	   c1c2r   rC   r   r   r   rE   rF   rO  rN  r  r  r
   rF  r   r   r`  r  r  r\  rr  rj  r   r   r  default)r$   rb  r   rT   c                 S   s(   t | |d}ttj|| }||fS )Nr  )r  r   r*   r|  unique)r  rY   r   Zcol_idnum_of_files_generatednumber_of_partitionsr&   r&   r'   _get_compare_pair  s     z<test_write_dataset_max_open_files.<locals>._get_compare_pairZmax_1F)r$   rb  r   rT   max_open_filesr   )
r*   rY   r0   rZ   r   r   r)   r/   r9  r9   )r<  r_   r   Zpartition_column_idr  Zrecord_batch_1Zrecord_batch_2Zrecord_batch_3Zrecord_batch_4r3   r   Zdata_source_1r  r  r  Zdata_source_2r  r&   r&   r'   !test_write_dataset_max_open_files  sp    
    r  c                 C   s   | d }t |}tj|tjjddd}| d }|d |d d |d |d d g}tjt|jd	gd	t	ddgid
}t
|t||d||d d S )Nr  Tr$  r  r  rE   rE  rF   r   r$  r   )ra  r   r>   r,  r  r   r*   r)   r+   r   r  rH   )r<  r_   r  r>   r  r  r   r&   r&   r'   #test_write_dataset_partitioned_dict  s2     
 

    r  c                    s   | d }t |}tj|dd}tjtdt fgdd}| d }g   fdd}tj||d	|d
|d |d d |d d h}tt	t
j }||kst| d }	tj||	d	|dd tj|d	|d}
tj|	d	|d}|
 | std S )Nr  r   r  r   r  Zpartitioned1c                    s     | j d S r5   )r   r`   Zwritten_filepaths_writtenr&   r'   file_visitor8  s    z4test_write_dataset_use_threads.<locals>.file_visitorr  TrT   r   r   r  r  part-0.featherr  Zpartitioned2Fr  r+  )ra  r   r>   r   r*   r)   r/   r9  ru   rX   rg  rh  r9   r   r   )r<  r_   r  r>   r   Ztarget1r  r  paths_written_setZtarget2Zresult1Zresult2r&   r  r'   test_write_dataset_use_threads+  sD         

   r  c           
   	      s  t jt tdt tjdt tddgdgdddgd}| d	 }tj	||d
dd t
|d}|d g}t|t|ksttj|dd }||st| d }|d |d d |d |d d g}g  g  fdd}tjt dt  fgdd}tj	||dd
||d t
|d}t|t|ksDtdd  D }|ks`ttj|d|d}| |stt dkst D ]}	t|	|kstqd S )Nr&  rE   rF   r   r   r'  r   r(  singledat_{i}.arrowr  basename_templaterT   r  zdat_0.arrowr*  r   r  r  r  c                    s     | j  | j d S r5   )r   r`   r  r  Zvisited_pathsZvisited_sizesr&   r'   r  n  s    z&test_write_table.<locals>.file_visitorr   r  )rT   r  r   r  c                 S   s   g | ]}t j|qS r&   )rE  r`   getsizerI   r`   r&   r&   r'   rK   y  s     z$test_write_table.<locals>.<listcomp>r+  r   )r*   r3   r   r   r   r4  r5  r6  r   r9  rW   r_  ru   r9   r>   r   r   r   r)   r/   r   rg  rh  )
r<  r3   rb  r  r  r   r  r   Zactual_sizesZvisited_pathr&   r  r'   test_write_tableS  sX      
 
 
 
 r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}t |gd	 }| d
 }t	j
||dd t|dt|d gkstt	j|dd |st| d }t	j
|g|dd t|dt|d gkstt	j|dd |st| d }t	j
| |dd t|dt|d gks<tt	j|dd |sZt| d }t	j
||g|dd t|dt|d gkstt	j|dd t |gd	 std S )Nr   rE   rF   r
   r   r'  r   r(  r   r  r  r   r  r  r*  zsingle-listZmultiplezmultiple-table)r*   r3   r   r   r   r4  r5  r6  rp  r   r9  ru   r_  r9   r>   r   r   r   )r<  r3   rb  r&   r&   r'   #test_write_table_multiple_fragments  s<       r  c              	   C   s   t jt tdt tjdt tddgdgdddgd}| d	 }tj	d
d |
 D ||jddd tj|dd }||st| d }t j|j|
 }tj	||ddd tj|dd }||std S )Nr&  rE   rF   r   r   r'  r   r(  Zinmemory_iterablec                 s   s   | ]
}|V  qd S r5   r&   )rI   rb   r&   r&   r'   r    s     z&test_write_iterable.<locals>.<genexpr>r  r  )r)   r  rT   r*  r   Zinmemory_readerr  )r*   r3   r   r   r   r4  r5  r6  r   r9  r   r)   r>   r   r   r9   r  rZ   )r<  r3   rb  r   r   r&   r&   r'   test_write_iterable  s2      
 r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}t	|}| d	 }tj
|||d
d |tj	|dd}||st| d }tj
|j|dgd|d
d |tj	|dd}||dgsttjtdd  tj
||||jd
d W 5 Q R X d S )Nr&  rE   rF   r   r   r'  r   r(  Zdataset_from_scannerr  r   r*  Zdataset_from_scanner2r   zCannot specify a schemar   )r)   rT   )r*   r3   r   r   r   r4  r5  r6  r   r>   r9  r   r   r   r9   r  r   r   r  r)   )r<  r   r3   r>   rb  r   r&   r&   r'   test_write_scanner  s:     
   r  c                 C   s   t jt tdt tddgd gddgd}t|	dgj
}| d }tj||d	|d
 tjjdgdd}tj|d|d
 }||std S )Nr&  rE   rF   r   r  r   r(  r>   r  r+  Tr$  r*  )r*   r3   r   r   r   r6  r  r   r   r  r)   r9  r   r  r>   r   r   r9   )r<  r3   r   rb  Zpartitioning_readr   r&   r&   r'   !test_write_table_partitioned_dict  s0          
r  c              	   C   s  t jt jtdddt tjdddddt tdd	gd
gdddgd}| d }tj	||dd t
|d}|d g}t|t|ksttj|dd }||stdD ]}t }|j|d}dt|kst| d| }tj	||||d t|d }	|dkrdnd}
|	j|
ks2ttj|dd }|j}|dkrp|d|dt  }|dkr|d|dt d}||}||stqd S )Nr&  r  r   r  zdatetime64[D]rp  zdatetime64[ns]rE   rF   r   r   r'  r   r(  rS  r}   r   r  part-0.parquet)1.02.42.6r?  z(<pyarrow.dataset.ParquetFileWriteOptionszparquet_dataset_version{0}rD  r  r  r   )r  r  r	   r  )r*   r3   r   r   r   rB  r  r6  r   r9  rW   r_  ru   r9   r>   r   r   r   r  r  rT   r[   Zread_metadataformat_versionr)   r+   Z	with_typer-   r  rH  )r<  r3   rb  r  r  r   r@  rT   optsmetaZexpected_versionr)   r  r&   r&   r'   test_write_dataset_parquet  sB    	



r  c              	   C   s  t jt tdt tjdt tddgdgdddgd}| d	 }tj	||d
d t
|d}|d g}t|t|ksttj|d
d }||sttjtjj|jjdd}|jdd}| d }tj	||||d tj||d }||s
td S )Nr&  rE   rF   r   r   r'  Zchr1r(  Zcsv_datasetrv  r   r  z
part-0.csvr  rn  F)Zinclude_headerZcsv_dataset_noheaderrD  )r*   r3   r   r   r   r4  r5  r6  r   r9  rW   r_  ru   r9   r>   r   r   ru  r   rv  rx  r)   r)  r  )r<  r3   rb  r  r  r   rT   r  r&   r&   r'   test_write_dataset_csv  s,     


r  c              	      sz   t jt tdt tjdt tddgdgdddgd}d	  fd
d}| d }tj	||d|d  svt
d S )Nr&  rE   rF   r   r   r'  r   r(  Fc                    s   | j d k	r| j jdkrd d S )NrC   T)r  r   r  Zvisitor_calledr&   r'   r  =  s    

z=test_write_dataset_parquet_file_visitor.<locals>.file_visitorrS  r}   )rT   r  )r*   r3   r   r   r   r4  r5  r6  r   r9  r9   )r<  r3   r  rb  r&   r  r'   'test_write_dataset_parquet_file_visitor4  s     
r  c           	         s   dd t dD }dd t dD }t||tddgdd}| d	 }tjtd
t fgdd}g  d  fdd}tj	||d|d|d |d d |d d h}t
ttj }||kstd k	stjdkstd S )Nc                 S   s    g | ]}|gd  D ]}|qqS r   r&   rI   r   itemr&   r&   r'   rK   L  s       z?test_partition_dataset_parquet_file_visitor.<locals>.<listcomp>r   c                 S   s$   g | ]}|gd  D ]}|d  qqS r  r&   r  r&   r&   r'   rK   M  s       rE   rF   r&  )r   r'  r   r  r   r   r  c                    s   | j r| j  | j d S r5   )r  r   r`   r  r  Zsample_metadatar&   r'   r  Y  s    zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitorr}   Tr  r  r  r  r   )r   r*   r3   r   r6  r   r   r)   r/   r9  ru   rX   rg  rh  r9   r   )	r<  Zf1_valsZf2_valsr3   rO  r   r  r  r  r&   r  r'   +test_partition_dataset_parquet_file_visitorJ  s8         

r  c                 C   sd   t dtjdddgi}|d jjdks.ttj|| dd t	
| d }|d jjdks`td S )NrE   r  zEurope/Brussels)tzr}   r   r  )r*   r3   r   r  r   r  r9   r   r9  r[   rC  )r<  r3   r   r&   r&   r'   (test_write_dataset_arrow_schema_metadatan  s
    r  c                 C   sd   ddl m} tddddgi}|ddi}tj|| d	d
 || d j}|j	ddiks`t
d S )Nr   rr  rE   r	   r   rC   rh     valuer  r   r  )r   r  r*   r3   r1   r   r9  rC  r)   r  r9   )r<  r  r3   r)   r&   r&   r'   "test_write_dataset_schema_metadata{  s    r  c                 C   sX   t ddddgi}|ddi}tj|| dd t| d	 j}|jddiksTt	d S )
NrE   r	   r   rC   rh  r  r}   r   r  )
r*   r3   r1   r   r9  r[   rC  r)   r  r9   )r<  r3   r)   r&   r&   r'   *test_write_dataset_schema_metadata_parquet  s
    r  c              	   C   sF  | \}}}}}}}}d ||||}tjttdttjdttddgdgdddgd	}t	j
tdt fgd
d}	t	j|d|d|	d t	jd|dd
d }
|
|st| d}t	j||d|	d t	jd|dd
d }
|
|st| d}t	j|d|d|	d t	jd|dd
d }
|
|sBtd S )Nr  r&  rE   rF   r   r   r'  r   r(  r   r  zmybucket/datasetr  r  r*  zmybucket/dataset2r+  r  r  zmybucket/dataset3)rT   r*   r3   r   r   r   r4  r5  r6  r   r   r)   r/   r9  r>   r   r   r9   )r  r  rQ   r  r  r  r  Zuri_templater3   r   r   r9  r&   r&   r'   test_write_dataset_s3  sl              

   

       
r  aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	   	   C   s  ddl m} | d \}}}}|ddd||dd}t| t tjttd	tt	j
d	tt	d
dgdgdddgd}tjtdt fgdd}tj|d|dd|dd tjd|ddd }||sttj|d|dd|dd tjd|ddd }||sttjtdd tj|d|dddd W 5 Q R X |ddd||ddd}tjtd d tj|d|dddd W 5 Q R X d S )!Nr   )r  r  ZlimitedZ
limited123z{}:{}http)r  r  endpoint_overrideschemer&  rE   rF   r   r   r'  r   r(  r   r  zexisting-bucketr  Fr  )r   rT   rU   r   r  r*  r  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   rT   rU   r  )r  r  r   r  Zallow_bucket_creationzAccess Denied)rt   r  rT   r   _minio_put_only_policyr*   r3   r   r   r   r4  r5  r6  r   r   r)   r/   r9  r>   r   r   r9   r   r   OSError)	r  r  r  r  r  rQ   r3   r   r   r&   r&   r'   test_write_dataset_s3_put_only  s    

        
       
   
   r  c              
   C   s   t dd d gi}t|| d  t t dt t  t  g}t	j
j| d g|t	 t d}||}|j|kstd S )NrE   r  )rk   r)   rT   r   )r*   r3   r[   r\   r)   r+   r2  r   r/   r   r   r   r   rQ   rd   r   r9   )r<  r   r3   r)   Zfsdsr&   r&   r'   $test_dataset_null_to_dictionary_cast!  s    
r  c              	   C   s  t dddgdddgd}tj|| d d	d
 tj| d d	d
}t dddgdddgd}tj|| d d	d
 tj| d d	d
}||dd}| t dddgdddgddd gdkst|j|dddd}| dt ddddgdddd gddd dgdkstd S )Nr	   r   rF  rE   rF   rK  colAr  r  r*  r   c   Zr  r  )colBcol3r  r  r
  r  r  r  
full outer)	join_type	r*   r3   r   r9  r>   rG  r   r9   r   r<  r  ds1r  ds2r   r&   r&   r'   test_dataset_join4  s0    



r  c              	   C   s  t dddgdddgd}tj|| d d	d
 tj| d d	d
}t dddgdddgd}tj|| d d	d
 tj| d d	d
}||d}| t dddgdddgddd gdkst|j|dddd}| dt ddddgdddd gddd dgdkstd S )Nr	   r   rF  rE   rF   rK  r  r  r*  r   r  r	  r  r  )r  r  r  r  r  r  _rr  Zright_suffixr  r  r&   r&   r'   test_dataset_join_unique_keyS  s0    



r  c              
   C   s  t dddgdddgddd	gd
}tj|| d dd tj| d dd}t dddgdddgdddgd
}tj|| d dd tj| d dd}|j|dddd}| dt jddddgdddd gddd	d gddd dgddd dggdddddgdkstd S )Nr	   r   rF  r   r&  <   rE   rF   rK  )r  r
  colValsr  r*  r   r  r	  r  r  r  r  r  r  r  r
  r  ZcolB_rZ	colVals_rr(  )	r*   r3   r   r9  r>   rG  r   r   r9   r  r&   r&   r'   test_dataset_join_collisionsr  s0    




r  dstyperQ   memc           
   	   C   s  t ddddgddddgd	}|d
krPtj|| d dd tj| d dd}n|dkrdt|}nt|tddk tddk}t	||d
krtj
ntjkst| t dgdgd	kst|dt dgdgd	kst|tddk tddkjtddkd}| t dgdgd	ksFttj|| d dd tj| d dd}| t dgdgd	kst|jtt ddgddgdddd}| dt dd gddgddgdksttt |d  W 5 Q R X tt |  W 5 Q R X |jd}|tddk |}	|	 t dddgiksnttt j ||  W 5 Q R X d S )Nr	   r   rF  r   rE   rF   rK  gr  rQ   r  r*  r   r  r  rC   r  r
   r   rl  r   r&  r
  r  zright outerkeysr  r
  )r  r
  r  )r*   r3   r   r9  r>   r  r   r  r+   r   r   r  r9   r   r   r   rG  r   r   r   r   r  r6   r)   r  Zreplace_schemar  )
r<  r  r  r  r   r2rl  joinedZschema_without_col2Z	newschemar&   r&   r'   test_dataset_filter  s    

$



 
 r"  c           
   	   C   s  t ddddgddddgd	}t d
ddgdddgd	}|dkrtj|| d dd tj| d dd}tj|| d dd tj| d dd}n"|dkrt|}t|}ntt||ftddk tdd
kB }|	 t ddd
gdddgd	ks
t
|jtt ddgddgdddd}|	 dt ddd
gdddgddd gdkslt
|tddk }|tddk }	tjtdd t||	f W 5 Q R X d S ) Nr	   r   rF  r   rE   rF   rK  r  r  r`  r   r  hr%   lrQ   r  r*  r   r  r  r  rC   r&  r  r  z
left outerr  )r  r  r
  zcurrently not supportedr   )r*   r3   r   r9  r>   r  r   r  r+   r   r9   rG  r   r   r   r  )
r<  r  r  r  r  r  Zfiltered_union_dsr!  Zfiltered_ds1Zfiltered_ds2r&   r&   r'   test_union_dataset_filter  sP    



 r%  c              	   C   s|   | d }t |\}}t|}| }|jdks4t|tddk }| jdksZtt	
t |  W 5 Q R X d S )Ntest_parquet_dataset_filterrR  r   r   r&  )rQ  r   rS  r   r   r9   r   r  r+   r   r   r  r6   )r<  rO  rP  r  r>   r   Zfiltered_dsr&   r&   r'   r&    s    
r&  c              	   C   s   t jt tdgdgd}t|}dtdi}|j|d}tj|| dgdd t	j
tdd	 tj|| dgdd W 5 Q R X d
S )z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    r&  Zoriginal_columnr(  Zrenamed_columnr   r*  r  z0'Column original_column does not exist in schemar   N)r*   r3   r   r   r   r>   r+   r   r9  r   r   KeyError)r<  r3   Ztable_datasetr   r   r&   r&   r'   4test_write_dataset_with_scanner_use_projected_schema  s.    
        r(  rT   )r*  r}   c              
   C   s   |dkrt d tddgddgd dddgdd	id gd
ddd ddgddigd
gd}tj|| d |d tj| d |d}|jdddddgd}| dd ddgd d	dd gddddd ddgdd dgddgkst	d S )Nr}   zpyarrow.parquetZabc123Zqrs456r	   r   Zbuttonrl  r  )r   elementvaluesstructsscrollZwindowrC   r   fizzZbuzz)user_ida.dotted.fieldZinteractionr3   r   r.  zinteraction.typezinteraction.valueszinteraction.structsr/  r   )r-  rl  )r.  r   r*  r+  r/  )
r   r  r*   r3   r   r9  r>   r   r  r9   )r<  rT   r3   r  r&   r&   r'   test_read_table_nested_columns7  s@    

 
  
 
 r0  c                 C   s   ddl m} | d }tjtdddddgt td	d
d	ddgt gddg}|j||ddgdd |j|ddt	t
dt t
dt gd  }||dkst|d }tt|}dd |D }tt|}||kstd S )Nr   )r>   zslash-writer-xr	   r   rC   r   r
   zexperiment/A/f.csvzexperiment/B/f.csvzexperiment/C/k.csvzexperiment/M/i.csvZexp_idexp_metar*  r   )r$   rb  rT   r   r  )r  rT   r   r)   c                 S   s   g | ]}d t |dd qS )z	exp_meta=rQ  rs  r   r  r&   r&   r'   rK   r  s     z5test_dataset_partition_with_slash.<locals>.<listcomp>)r   r>   r*   r0   r  r   r   r  r9  r)   r+   r   r%  r   r9   r   r  r   ru   rE  rK  )Ztmpdirr   r`   Zdt_tablerC  r1  Zencoded_pathsr  r&   r&   r'   !test_dataset_partition_with_slashS  sF     r2  c                 C   s  t t jdt  ddt jdt  ddg}dddgd d	d gg}t jj||d
}t|| d  tj	| d dd}|
 j|sttj|| d dd tj	| d dd}|
 j|sttj||g| d dd tj	| d dd}|
 j|std S )Nr   F)Znullabler<  Tr	   r   rC   r
   rP   Z	nulltest1r}   r   Z	nulltest2Z	nulltest3)r*   r)   r+   r-   r0   r  r[   r  r   r>   r   r   r9   r9  )r<  Zschema_nullabler  r3   r>   r&   r&   r'   'test_write_dataset_preserve_nullabilityx  s    r3  c                 C   sX  t t jdt  ddidt dt  g}t t dt  t dt  g}dddgd d	d gg}t jj||d
}t jj||d
}tj||g| d dd tj| d dd}|	 jj
|ddsttj||g| d dd tj| d dd}|	 jj
|ddsttj||g| d d|d tj| d dd}|	 jj
|ddsTtd S )Nr   s   foos   barr  r<  r	   r   rC   r
   rP   Ztest1r}   r   Tr  Ztest2Ztest3r  )r*   r)   r+   r-   r0   r  r   r9  r>   r   r   r9   )r<  Zschema_metadataZschema_no_metar  r3   Ztable_no_metar>   r&   r&   r'   *test_write_dataset_preserve_field_metadata  s,    r4  c              	   C   s  t jt dddddgt dddddggd	d
gd}|dkrftj|| d dd tj| d dd}n|dkrzt|}nt|d	 	 dddddgdddddgdkst
|dg 	 dddddgdddddgdkst
|td	dk d	 	 dddgdddgdks&t
t jjt jddddgt  dt ddddggddgd}t|}|dg}| 	 }|d ddddgkst
|d ddddgkst
|dg}| 	 }|d ddddgkst
|d ddddgkst
d S )NrC   r	   r   r   r
   rF   rE   rO  r*  r  r(  rQ   r  r*  r   r  )r  r*  )r*  
descendingr   r  r   rl  carr  Zfoobar)rE   r5  )rE   rz  )r*   r3   r   r   r9  r>   r  r   r   r   r9   r   r  r+   r0   r  r-   )r<  r  r3   r   Z
sorted_tabZsorted_tab_dictr&   r&   r'   test_dataset_sort_by  sT    


r7  )NN)N)NN)NN)r   r	   r   )rw   rE  rO  r   rg  r@  sysr  r7  r  r  urllib.parser   numpyr   r   r   r*   r{  r|  r  Zpyarrow.csvZpyarrow.jsonrA  rt   rQ   Zpyarrow.tests.utilr   r   r   r   r   r|   r   r|  r  r>   r   Zpyarrow.parquetr}   r[   markZ
pytestmarkr(   r4   rA   Zfixturer]   ry   r   r   r   r   r   r   r	  r  r  r  r  r  r?  rD  rI  rQ  r\  r_  ri  r  r  Zparametrizer   r  r  r  r  r  r  filterwarningsr  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r"  r(  r8  r;  r>  r?  rD  rN  rP  rR  rS  rT  rU  rX  r[  r_  r  re  rf  rk  rn  ro  rr  rs  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zs3r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r
  r  r  r  r   r#  r$  r-  r0  r1  r;  r>  rB  rJ  rQ  rT  ZskipifplatformrW  rX  rZ  r^  r`  ra  rb  rg  ri  rk  rm  rn  rq  ru  rv  rx  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r%  r&  r(  r0  r2  r3  r4  r7  r&   r&   r&   r'   <module>   s  


"


1
.
G
;

0



y


&'H

% <
(9*


(   
d 
U%












	B

$



	9
B






B 9&
 
/
'=  #K0$#D&/$+#
/J
 N
 -%
 