U
    -e(                  &   @   s  d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZ d dlZd dlmZ ddlmZ ddlmZ d	d
lmZ eeZG dd dejZee
e	e ef dddZedG dd deZG dd deeeeef f Zg g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g g d%Zedkrd dlm Z  e ddZ!e!"d e!# Z$ee$j%Z%e&e%Z'e(e' e')e% dS )    N)Counter)Path)AnyClassVarDictOptionalTupleUnion)DatasetCardData   )METADATA_CONFIGS_FIELD)
get_logger   )
deprecatedc                       s&   e Zd Zdd Zd fdd	Z  ZS )_NoDuplicateSafeLoaderc                    sR   fdd|j D }dd |D }t|  fdd D }|rNtd| d S )Nc                    s   g | ]\}} j | qS  )Zconstructed_objects).0Zkey_node_selfr   X/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/utils/metadata.py
<listcomp>   s     zS_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_node.<locals>.<listcomp>c                 S   s"   g | ]}t |trt|n|qS r   )
isinstancelisttupler   keyr   r   r   r      s     c                    s   g | ]} | d kr|qS )r   r   r   )counterr   r   r      s      zGot duplicate yaml keys: )valuer   	TypeError)r   nodekeysZduplicate_keysr   )r   r   r   (_check_no_duplicates_on_constructed_node   s    z?_NoDuplicateSafeLoader._check_no_duplicates_on_constructed_nodeFc                    s   t  j||d}| | |S )N)deep)superconstruct_mappingr"   )r   r    r#   mapping	__class__r   r   r%      s    
z(_NoDuplicateSafeLoader.construct_mapping)F)__name__
__module____qualname__r"   r%   __classcell__r   r   r'   r   r      s   r   readme_contentreturnc                 C   s|   t |  }|rn|d dkrnd|dd  krn|dd  dd }d|d| }|d||d d  fS d d|fS )Nr   z---r   
)r   
splitlinesindexjoin)r.   full_contentZsep_idxZ	yamlblockr   r   r   _split_yaml_from_readme    s     r5   z.Use `huggingface_hub.DatasetCardData` instead.c                   @   st   e Zd ZdhZeeeef d dddZedddZ	de
e ed	d
dZeed dddZedddZdS )DatasetMetadataZtrain_eval_index)pathr/   c              	   C   sD   t |dd}t| \}}W 5 Q R X |dk	r:| |S |  S dS )aS  Loads and validates the dataset metadata from its dataset card (README.md)

        Args:
            path (:obj:`Path`): Path to the dataset card (its README.md file)

        Returns:
            :class:`DatasetMetadata`: The dataset's metadata

        Raises:
            :obj:`TypeError`: If the dataset's metadata is invalid
        utf-8encodingN)openr5   readfrom_yaml_string)clsr7   readme_fileZyaml_stringr   r   r   r   from_readme/   s
    
zDatasetMetadata.from_readme)r7   c              	   C   s`   |  r*t|dd}| }W 5 Q R X nd }| |}t|ddd}|| W 5 Q R X d S )Nr8   r9   w)existsr;   r<   
_to_readmewrite)r   r7   r?   r.   Zupdated_readme_contentr   r   r   	to_readmeC   s    
zDatasetMetadata.to_readmeNr-   c                 C   s>   |d k	r*t |\}}d|   d | }nd|   d }|S )Nz---
)r5   to_yaml_string)r   r.   r   contentr4   r   r   r   rC   M   s
    zDatasetMetadata._to_readme)stringr/   c                    s2   t j|tdpi } fdd| D } f |S )a'  Loads and validates the dataset metadata from a YAML string

        Args:
            string (:obj:`str`): The YAML string

        Returns:
            :class:`DatasetMetadata`: The dataset's metadata

        Raises:
            :obj:`TypeError`: If the dataset's metadata is invalid
        )Loaderc                    s4   i | ],\}}| d d jkr*| d dn||qS )-r   )replace_FIELDS_WITH_DASHESr   r   r   r>   r   r   
<dictcomp>e   s     z4DatasetMetadata.from_yaml_string.<locals>.<dictcomp>)yamlloadr   items)r>   rH   metadata_dictr   rN   r   r=   U   s
    
z DatasetMetadata.from_yaml_stringr/   c                    s*   t j fdd  D dddddS )Nc                    s,   i | ]$\}}| j kr"|d dn||qS )r   rJ   )rL   rK   rM   r   r   r   rO   m   s    z2DatasetMetadata.to_yaml_string.<locals>.<dictcomp>FTr8   )	sort_keysZallow_unicoder:   )rP   Z	safe_dumprR   decoder   r   r   r   rF   k   s    
zDatasetMetadata.to_yaml_string)N)r)   r*   r+   rL   classmethodr	   r   strr@   rE   r   rC   r=   rF   r   r   r   r   r6   *   s   
r6   c                   @   sj   e Zd ZU dZeZee ed< e	e
dddZeed dddZed	dd
dZee dddZd	S )MetadataConfigsz5Should be in format {config_name: {**config_params}}.
FIELD_NAME)metadata_configc                 C   s   |  d}|d k	rtd| d}t|ttfs:t|t|tr|D ]L}t|ttfrt|trHt|dkrd|krt| dttfsHt|qHd S )N
data_filesz
                Expected data_files in YAML to be either a string or a list of strings
                or a list of dicts with two keys: 'split' and 'path', but got a  
                Examples of data_files in YAML:

                   data_files: data.csv

                   data_files: data/*.png

                   data_files:
                    - part0/*
                    - part1/*

                   data_files:
                    - split: train
                      path: train/*
                    - split: test
                      path: test/*

                   data_files:
                    - split: train
                      path:
                      - train/part1/*
                      - train/part2/*
                    - split: test
                      path: test/*
                r   splitr7   )	gettextwrapdedentr   r   rX   
ValueErrordictlen)r[   Zyaml_data_filesZyaml_error_messageZyaml_data_files_itemr   r   r   $_raise_if_data_files_field_not_valid|   s,    


	z4MetadataConfigs._raise_if_data_files_field_not_valid)dataset_card_datar/   c                 C   s|   | | jrv|| j }t|ts8td| j d| d|D ]&}d|krXtd| d| | q<| dd |D S |  S )	Nz	Expected z to be a list, but got ''config_namezUEach config must include `config_name` field with a string name of a config, but got z. c                 S   s$   i | ]}|d  dd |  D qS )rg   c                 S   s   i | ]\}}|d kr||qS rg   r   )r   paramr   r   r   r   rO      s       zEMetadataConfigs.from_dataset_card_data.<locals>.<dictcomp>.<dictcomp>)rR   )r   configr   r   r   rO      s    z:MetadataConfigs.from_dataset_card_data.<locals>.<dictcomp>)r^   rZ   r   r   ra   rd   )r>   re   Zmetadata_configsr[   r   r   r   from_dataset_card_data   s     


z&MetadataConfigs.from_dataset_card_dataNc                 C   st   | rp|   D ]}| | q| |}tt||  }| D ]\}}|dd  qBdd | D || j< d S )Nrg   c                 S   s   g | ]\}}d |i|qS rh   r   )r   rg   config_metadatar   r   r   r      s   z8MetadataConfigs.to_dataset_card_data.<locals>.<listcomp>)valuesrd   rk   rb   sortedrR   poprZ   )r   re   r[   Zcurrent_metadata_configsZtotal_metadata_configsrg   rl   r   r   r   to_dataset_card_data   s    
z$MetadataConfigs.to_dataset_card_datarT   c                 C   sP   d }|   D ]>\}}|dks&|dr|d kr4|}qtd| d| dq|S )Ndefaultz&Dataset has several default configs: 'z' and 'z'.)rR   r^   ra   )r   Zdefault_config_namerg   r[   r   r   r   get_default_config_name   s    z'MetadataConfigs.get_default_config_name)r)   r*   r+   __doc__r   rZ   r   rX   __annotations__staticmethodrb   rd   rW   r
   rk   rp   r   rr   r   r   r   r   rY   w   s   
/rY   )%zimage-classificationtranslationzimage-segmentationz	fill-maskzautomatic-speech-recognitionztoken-classificationzsentence-similarityzaudio-classificationzquestion-answeringZsummarizationzzero-shot-classificationztable-to-textzfeature-extractionotherzmultiple-choiceztext-classificationztext-to-imageztext2text-generationzzero-shot-image-classificationztabular-classificationztabular-regressionzimage-to-imageztabular-to-textzunconditional-image-generationztext-retrievalztext-to-speechzobject-detectionzaudio-to-audioztext-generationZconversationalztable-question-answeringzvisual-question-answeringzimage-to-textzreinforcement-learningzvoice-activity-detectionztime-series-forecastingzdocument-question-answering__main__)ArgumentParserz5Validate the yaml metadata block of a README.md file.)usagereadme_filepath)*r_   collectionsr   pathlibr   typingr   r   r   r   r   r	   rP   Zhuggingface_hubr
   rj   r   Zutils.loggingr   Zdeprecation_utilsr   r)   loggerZ
SafeLoaderr   rX   r5   rb   r6   rY   Zknown_task_idsargparsery   Zapadd_argument
parse_argsargsr{   r@   Zdataset_metadataprintrE   r   r   r   r   <module>   sz    
L h)




