U
    -et                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlmZ ejjeZeG dd dejZG d	d
 d
ejZdS )    N)InitVar	dataclass)StringIO)Optionalrequire_storage_cast)
table_castc                   @   s   e Zd ZU dZdZeej ed< dZ	e
ed< dZeee
  ed< dZee
 ed< d	Zeed
< dZeed< dZe
ed< dd ZdS )
TextConfigzBuilderConfig for text files.Nfeatureszutf-8encoding
deprecatederrorsencoding_errorsi   	chunksizeFkeep_linebreaksline	sample_byc                 C   s&   |dkr"t d| dt || _d S )Nr   z'errors' was deprecated in favor of 'encoding_errors' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'encoding_errors=z
' instead.)warningswarnFutureWarningr   )selfr    r   d/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/packaged_modules/text/text.py__post_init__   s    
zTextConfig.__post_init__)__name__
__module____qualname____doc__r
   r   datasetsZFeatures__annotations__r   strr   r   r   r   intr   boolr   r   r   r   r   r   r	      s   
r	   c                   @   s<   e Zd ZeZdd Zdd ZejejdddZ	dd	 Z
d
S )Textc                 C   s   t j| jjdS )N)r
   )r   ZDatasetInfoconfigr
   )r   r   r   r   _info*   s    z
Text._infoc                    s   | j jstd| j j  | j j}t|tttfrv|}t|trL|g} fdd|D }tj	tj
jd|idgS g }| D ]B\}}t|tr|g} fdd|D }|tj	|d|id q|S )a  The `data_files` kwarg in load_dataset() can be a str, List[str], Dict[str,str], or Dict[str,List[str]].

        If str or List[str], then the dataset returns only the 'train' split.
        If dict, then keys should be from the `datasets.Split` enum.
        z=At least one data file must be specified, but got data_files=c                    s   g | ]}  |qS r   Z
iter_files.0file
dl_managerr   r   
<listcomp>:   s     z*Text._split_generators.<locals>.<listcomp>files)nameZ
gen_kwargsc                    s   g | ]}  |qS r   r&   r'   r*   r   r   r,   @   s     )r$   
data_files
ValueErrorZdownload_and_extract
isinstancer    listtupler   ZSplitGeneratorZSplitZTRAINitemsappend)r   r+   r/   r-   ZsplitsZ
split_namer   r*   r   _split_generators-   s     

zText._split_generators)pa_tablereturnc                 C   sf   | j jd k	rJ| j jj}tdd | j j D r<||}n
t||}|S |tdt	 iS d S )Nc                 s   s   | ]}t | V  qd S )Nr   )r(   featurer   r   r   	<genexpr>G   s     z#Text._cast_table.<locals>.<genexpr>text)
r$   r
   Zarrow_schemaallvaluescastr   paschemastring)r   r7   r@   r   r   r   _cast_tableD   s    

zText._cast_tablec              
   c   s   | j jd k	rt| j jndg}ttj|D ]\}}t|| j j| j j	d}| j j
dkrd}|| j j}|svq|| 7 }t| }| j jsdd |D }tjjt|g|d}||f| |fV  |d7 }qbn| j j
d	krd}d
}|| j j}	|	sqz||	7 }|| 7 }|d}tjjtdd |d d D g|d}||f| |fV  |d7 }|d }q|rtjjt|gg|d}||f| |fV  n@| j j
dkr| }
tjjt|
gg|d}|| |fV  W 5 Q R X q.d S )Nr;   )r   r   r   r   c                 S   s   g | ]}| d qS )
)rstrip)r(   r   r   r   r   r,   `   s     z)Text._generate_tables.<locals>.<listcomp>)names   Z	paragraph z

c                 S   s   g | ]}|r|qS r   r   )r(   Zexampler   r   r   r,   r   s      Zdocument)r$   r
   r2   	enumerate	itertoolschainfrom_iterableopenr   r   r   readr   readliner   	readlinesr   r?   TableZfrom_arraysarrayrB   split)r   r-   Zpa_table_namesZfile_idxr)   fZ	batch_idxbatchr7   Z	new_batchr;   r   r   r   _generate_tablesQ   sL    
 
zText._generate_tablesN)r   r   r   r	   ZBUILDER_CONFIG_CLASSr%   r6   r?   rQ   rB   rV   r   r   r   r   r#   '   s
   r#   )rJ   r   dataclassesr   r   ior   typingr   Zpyarrowr?   r   Zdatasets.features.featuresr   Zdatasets.tabler   utilsloggingZ
get_loggerr   loggerZBuilderConfigr	   ZArrowBasedBuilderr#   r   r   r   r   <module>   s   