U
    -es$                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlZd dl	mZ
 d dlZd dlmZ d dlmZ ejjeZeG dd dejZG dd	 d	ejZdS )
    N)	dataclass)Optional)
table_cast)readlinec                   @   s   e Zd ZU dZdZeej ed< dZ	e
ed< dZee
 ed< dZee
 ed< dZeed	< dZee ed
< dZeed< dZee ed< dS )
JsonConfigzBuilderConfig for JSON.Nfeaturesutf-8encodingencoding_errorsfieldTuse_threads
block_sizei   	chunksizenewlines_in_values)__name__
__module____qualname____doc__r   r   datasetsZFeatures__annotations__r	   strr
   r   r   boolr   intr   r    r   r   d/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/packaged_modules/json/json.pyr      s   
r   c                   @   s<   e Zd ZeZdd Zdd ZejejdddZ	dd	 Z
d
S )Jsonc                 C   s\   | j jd k	r"td | j j| j _| j jdk	r8td | j jd k	rLtdtj	| j j
dS )NzTThe JSON loader parameter `block_size` is deprecated. Please use `chunksize` insteadTzZThe JSON loader parameter `use_threads` is deprecated and doesn't have any effect anymore.zEThe JSON loader parameter `newlines_in_values` is no longer supported)r   )configr   loggerwarningr   r   r   
ValueErrorr   ZDatasetInfor   )selfr   r   r   _info#   s    
z
Json._infoc                    s   | j jstd| j j  | j j}t|tttfrv|}t|trL|g} fdd|D }tj	tj
jd|idgS g }| D ]B\}}t|tr|g} fdd|D }|tj	|d|id q|S )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=c                    s   g | ]}  |qS r   Z
iter_files.0file
dl_managerr   r   
<listcomp>8   s     z*Json._split_generators.<locals>.<listcomp>files)nameZ
gen_kwargsc                    s   g | ]}  |qS r   r"   r#   r&   r   r   r(   >   s     )r   
data_filesr   Zdownload_and_extract
isinstancer   listtupler   ZSplitGeneratorZSplitZTRAINitemsappend)r    r'   r+   r)   ZsplitsZ
split_namer   r&   r   _split_generators/   s     

zJson._split_generators)pa_tablereturnc                 C   sn   | j jd k	rjt| j jt|j D ]6}| j jj|j}||tj	d gt
| |d}q"t|| j jj}|S )N)type)r   r   setZcolumn_namesZarrow_schemar   r4   append_columnpaarraylenr   )r    r2   Zcolumn_namer4   r   r   r   _cast_tableB   s    "zJson._cast_tablec                 #   s  t tj|D ]\}}| jjd k	rt|| jj| jjd}t	
| W 5 Q R X  | jj  t ttfrt jdd  D  } fdd|D }n }tj|}|| |fV  qt|d}d}t| jjd d	}	| jjd k	r| jjnd
}
|| jj}|sqz|| 7 }W n( ttjfk
rD   |t|7 }Y nX | jjdkrl|j| jj|
dd}zz&tjt|tj |	dd}W q"W n tj!tj"fk
r } z`t|tj!rdt#|ks|	t$|krނ n.t%&dt$| d|	 d|	d  d |	d9 }	W 5 d }~X Y nX qnW n tj!k
r } ztz0t|| jj| jjd}t	
| W 5 Q R X W n< t	j'k
r   t%(d| dt)| d|  |Y nX t trjz8t jdd  D  } fdd|D }tj|}W n\ tj!tfk
rL } z6t%(d| dt)| d|  t*d| dd W 5 d }~X Y nX || |fV  W Y NqnDt%(d| dt)| d|  t*d| dt#t +  dd W 5 d }~X Y nX ||f| |fV  |d7 }qW 5 Q R X qd S )N)r	   errorsc                 S   s   g | ]}|  qS r   keysr$   rowr   r   r   r(   Y   s     z)Json._generate_tables.<locals>.<listcomp>c                    s    i | ]   fd dD qS )c                    s   g | ]}|  qS r   getr>   colr   r   r(   Z   s     4Json._generate_tables.<locals>.<dictcomp>.<listcomp>r   r$   ZdatasetrB   r   
<dictcomp>Z   s      z)Json._generate_tables.<locals>.<dictcomp>rbr       i @  strictr   )r;   )r   )Zread_optionsZ
straddlingz	Batch of z* bytes couldn't be parsed with block_size=z. Retrying with block_size=   .zFailed to read file 'z' with error z: c                 S   s   g | ]}|  qS r   r<   r>   r   r   r   r(      s     c                    s    i | ]   fd dD qS )c                    s   g | ]}|  qS r   r@   r>   rB   r   r   r(      s     rD   r   rE   rF   rB   r   rG      s      z-Not able to read records in the JSON file at z. You should probably indicate the field of the JSON file containing your records. This JSON file contain the following fields: zX. Select the correct one and provide it as `field='XXX'` to the dataset loading method.    ),	enumerate	itertoolschainfrom_iterabler   r   openr	   r
   jsonloadr,   r-   r.   r5   unionr7   TableZfrom_pydictr:   maxr   readr   AttributeErrorioUnsupportedOperationdecodeencodepaj	read_jsonBytesIOZReadOptionsZArrowInvalidZArrowNotImplementedErrorr   r9   r   debugJSONDecodeErrorerrorr4   r   r=   )r    r)   Zfile_idxr%   fr=   mappingr2   Z	batch_idxr   r
   batcher   rF   r   _generate_tablesM   s     




$   
 $ 	zJson._generate_tablesN)r   r   r   r   ZBUILDER_CONFIG_CLASSr!   r1   r7   rV   r:   rh   r   r   r   r   r       s
   r   )rZ   rO   rS   dataclassesr   typingr   Zpyarrowr7   Zpyarrow.jsonr^   r   Zdatasets.tabler   Zdatasets.utils.file_utilsr   utilsloggingZ
get_loggerr   r   ZBuilderConfigr   ZArrowBasedBuilderr   r   r   r   r   <module>   s   