U
    -e[                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ e"e%Z&G dd de'Z(edd-ddZ)edd.ddZ*edd/e+e+ee dddZ,edd0e+e+ee dddZ-d1e+eeee	e+f  ee eeee+f  eee+e$f  eee.e+f  d!d"d#Z/d2e+eee+e$f  ee eeee+f  ee+ eeee	e+f  d$d%d&Z0d3e+ee+ eee+ee+ e
e+ee+ee+ f f f  ee eeee+f  eee+e$f  eee.e+f  ed'd(d)Z1d4e+ee+ eee+ee+ e
e+ee+ee+ f f f  ee eeee+f  eee+e$f  eee.e+f  d*d+d,Z2dS )5z List and inspect datasets.    N)PurePath)DictListMappingOptionalSequenceUnion   )DownloadConfig)DownloadMode)StreamingDownloadManager)DatasetInfo)dataset_module_factoryget_dataset_builder_classimport_main_classload_dataset_buildermetric_module_factory)
deprecated)relative_to_absolute_path)
get_logger)Versionc                   @   s   e Zd ZdS )SplitsNotFoundErrorN)__name__
__module____qualname__ r   r   Q/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/inspect.pyr   /   s   r   z,Use 'huggingface_hub.list_datasets' instead.TFc                 C   s8   t j|d}| sdd |D }|s0dd |D }t|S )a]  List all the datasets scripts available on the Hugging Face Hub.

    Args:
        with_community_datasets (`bool`, *optional*, defaults to `True`):
            Include the community provided datasets.
        with_details (`bool`, *optional*, defaults to `False`):
            Return the full details on the datasets instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_datasets
    >>> list_datasets()
    ['acronym_identification',
     'ade_corpus_v2',
     'adversarial_qa',
     'aeslc',
     'afrikaans_ner_corpus',
     'ag_news',
     ...
    ]
    ```
    )fullc                 S   s   g | ]}d |j kr|qS /id.0datasetr   r   r   
<listcomp>N   s     
 z!list_datasets.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r    r"   r   r   r   r%   P   s     )huggingface_hublist_datasetslist)Zwith_community_datasetswith_detailsZdatasetsr   r   r   r'   3   s    r'   ux   Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec                 C   s0   t  }| sdd |D }|s,dd |D }|S )u  List all the metrics script available on the Hugging Face Hub.

    <Deprecated version="2.5.0">

    Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
        with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.

    Example:

    ```py
    >>> from datasets import list_metrics
    >>> list_metrics()
    ['accuracy',
     'bertscore',
     'bleu',
     'bleurt',
     'cer',
     'chrf',
     ...
    ]
    ```
    c                 S   s   g | ]}d |j kr|qS r   r    r#   Zmetricr   r   r   r%   u   s     
 z list_metrics.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r    r*   r   r   r   r%   w   s     )r&   list_metrics)Zwith_community_metricsr)   Zmetricsr   r   r   r+   T   s    r+   z?Clone the dataset repository from the Hugging Face Hub instead.)path
local_pathdownload_configc              
   K   s   t | fd|i|}t|}t|}tj|}t|D ]|\}}	}
tj|tj	||}tj
|dd dd |	D |	dd< |
D ]$}ttj||tj|| qt|| q<t|}td|  d| d	| d
t|  d	 dS )a  
    Allow inspection/modification of a dataset script by copying on local drive at local_path.

    Args:
        path (`str`): Path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name
                as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`.
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`.
        local_path (`str`):
            Path to the local folder to copy the dataset script to.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        **download_kwargs (additional keyword arguments):
            Optional arguments for [`DownloadConfig`] which will override
            the attributes of `download_config` if supplied.
    r.   Texist_okc                 S   s   g | ]}| d s|qS ).__
startswithr#   dirnamer   r   r   r%      s     
 z#inspect_dataset.<locals>.<listcomp>Nz"The processing script for dataset  can be inspected at . The main class is in zP. You can modify this processing script and use it with `datasets.load_dataset("")`.)r   r   inspectgetsourcefileosr,   r7   walkjoinrelpathmakedirsshutilcopy2copystatr   printr   as_posix)r,   r-   r.   download_kwargsdataset_modulebuilder_clsmodule_source_pathmodule_source_dirpathdirpathdirnames	filenamesdst_dirpathfilenamer   r   r   inspect_dataset{   s    
"$rQ   uz   Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluatec              
   K   s   t | fd|i|}t|jdd}t|}tj|}t|D ]|\}}	}
tj	|tj
||}tj|dd dd |	D |	dd< |
D ]$}ttj	||tj	|| qt|| qBt|}td	|  d
| d| dt|  d	 dS )u  
    Allow inspection/modification of a metric script by copying it on local drive at local_path.

    <Deprecated version="2.5.0">

    Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        local_path (``str``): path to the local folder to copy the datset script to.
        download_config (Optional ``datasets.DownloadConfig``): specific download configuration parameters.
        **download_kwargs (additional keyword arguments): optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.
    r.   F)r$   Tr/   c                 S   s   g | ]}| d s|qS r1   r4   r6   r   r   r   r%      s     
 z"inspect_metric.<locals>.<listcomp>Nz"The processing scripts for metric r8   r9   zP. You can modify this processing scripts and use it with `datasets.load_metric("r:   )r   r   module_pathr;   r<   r=   r,   r7   r>   r?   r@   rA   rB   rC   rD   r   rE   r   rF   )r,   r-   r.   rG   Zmetric_moduleZ
metric_clsrJ   rK   rL   rM   rN   rO   rP   r   r   r   inspect_metric   s    
"$rS   r   )r,   
data_filesr.   download_moderevisiontokenc           	         sR   |dkr t d| dt |td} fdd|D S )a!
  Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or``'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_infos
    >>> get_dataset_infos('rotten_tomatoes')
    {'default': DatasetInfo(description="Movie Review Dataset.
This is a dataset of containing 5,331 positive and 5,331 negative processed
sentences from Rotten Tomatoes movie reviews...), ...}
    ```
    r   'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=
' instead.)r,   rV   r.   rU   rT   rW   c                    s,   i | ]$}|t f |d  qS )r,   config_namerT   r.   rU   rV   rW   )get_dataset_config_info)r#   r[   config_kwargsrT   r.   rU   r,   rV   rW   r   r   
<dictcomp>  s    z%get_dataset_infos.<locals>.<dictcomp>)warningswarnFutureWarningget_dataset_config_names)	r,   rT   r.   rU   rV   rW   use_auth_tokenr^   Zconfig_namesr   r]   r   get_dataset_infos   s"    7
re   )r,   rV   r.   rU   dynamic_modules_pathrT   c           	      K   sN   t | f|||||d|}t|tj| d}t|j pL|j	ddgS )a  Get the list of available config names for a particular dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        revision (`Union[str, datasets.Version]`, *optional*):
            If specified, the dataset module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the main branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        data_files (`Union[Dict, List, str]`, *optional*):
            Defining the data_files of the dataset configuration.
        **download_kwargs (additional keyword arguments):
            Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
            for example `token`.

    Example:

    ```py
    >>> from datasets import get_dataset_config_names
    >>> get_dataset_config_names("glue")
    ['cola',
     'sst2',
     'mrpc',
     'qqp',
     'stsb',
     'mnli',
     'mnli_mismatched',
     'mnli_matched',
     'qnli',
     'rte',
     'wnli',
     'ax']
    ```
    )rV   r.   rU   rf   rT   )dataset_namer[   default)
r   r   r=   r,   basenamer(   Zbuilder_configskeysZbuilder_kwargsget)	r,   rV   r.   rU   rf   rT   rG   rH   rI   r   r   r   rc   '  s    8	rc   )r,   r[   rT   r.   rU   rV   rW   returnc              
      s   |dkr t d| dt |}t f||||||d|}	|	j}
|
jdkr|rZ| nt }|dk	rn||_|		t
|	j|d z( fdd|	t
|	j|dD |
_W n, tk
r } ztd	|W 5 d}~X Y nX |
S )
a  Get the meta information (DatasetInfo) about a dataset for a particular config

    Args:
        path (``str``): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
                e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
        config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
        data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
        download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.

    r   rX   rY   )namerT   r.   rU   rV   rW   N)	base_pathr.   c                    s   i | ]}|j |j  d qS ))rm   rg   )rm   )r#   Zsplit_generatorr,   r   r   r_     s    z+get_dataset_config_info.<locals>.<dictcomp>z<The split names could not be parsed from the dataset config.)r`   ra   rb   r   infosplitscopyr
   rW   Z_check_manual_downloadr   rn   Z_split_generators	Exceptionr   )r,   r[   rT   r.   rU   rV   rW   rd   r^   Zbuilderrp   errr   ro   r   r\   l  sF    )



r\   rZ   c           
   	   K   sL   |dkr t d| dt |}t| f||||||d|}	t|	j S )aW	  Get the list of available splits for a particular config and dataset.

    Args:
        path (`str`): path to the dataset processing script with the dataset builder. Can be either:

            - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
            - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
                e.g. `'squad'`, `'glue'` or `'openai/webtext'`
        config_name (`str`, *optional*):
            Defining the name of the dataset configuration.
        data_files (`str` or `Sequence` or `Mapping`, *optional*):
            Path(s) to source data file(s).
        download_config ([`DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision ([`Version`] or `str`, *optional*):
            Version of the dataset script to load.
            As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
            You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
        token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.
        use_auth_token (`str` or `bool`, *optional*):
            Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If `True`, or not specified, will get token from `"~/.huggingface"`.

            <Deprecated version="2.14.0">

            `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.

            </Deprecated>

        **config_kwargs (additional keyword arguments):
            Optional attributes for builder class which will override the attributes if supplied.

    Example:

    ```py
    >>> from datasets import get_dataset_split_names
    >>> get_dataset_split_names('rotten_tomatoes')
    ['train', 'validation', 'test']
    ```
    r   rX   rY   )r[   rT   r.   rU   rV   rW   )r`   ra   rb   r\   r(   rq   rj   )
r,   r[   rT   r.   rU   rV   rW   rd   r^   rp   r   r   r   get_dataset_split_names  s&    8

ru   )TF)TF)N)N)NNNNNr   )NNNNN)NNNNNNr   )NNNNNNr   )3__doc__r;   r=   rB   r`   pathlibr   typingr   r   r   r   r   r   r&   Zdownload.download_configr
   Zdownload.download_managerr   Z#download.streaming_download_managerr   rp   r   loadr   r   r   r   r   Zutils.deprecation_utilsr   Zutils.file_utilsr   Zutils.loggingr   Zutils.versionr   r   logger
ValueErrorr   r'   r+   strrQ   rS   boolre   rc   r\   ru   r   r   r   r   <module>   s     $)+      X     G       (Q       (