U
    -efm                     @   s:  d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z, e)e-Z.G dd de%Z/dd Z0G dd dZ1G dd de1Z2dS )z Metrics base class.    N)AnyDictListOptionalTupleUnion   )config)Dataset)ArrowReader)ArrowWriter)DownloadConfig)DownloadManager)Features)DatasetInfo
MetricInfo)camelcase_to_snakecase)
deprecated)BaseFileLockFileLockTimeout)
get_logger)copyfunc	temp_seedc                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )FileFreeLockz-Thread lock until a file **cannot** be lockedc                    s"   t || _t j|f|| d S N)r   filelocksuper__init__)self	lock_fileargskwargs	__class__ P/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/metric.pyr   .   s    
zFileFreeLock.__init__c                 C   sH   z| j jddd W n tk
r2   | j j| _Y nX | j   d | _d S )Ng{Gz?g{Gz?)timeoutZpoll_intervall)r   acquirer   r    _lock_file_fdreleaser   r%   r%   r&   _acquire2   s    
zFileFreeLock._acquirec                 C   s
   d | _ d S r   )r)   r+   r%   r%   r&   _release=   s    zFileFreeLock._release)__name__
__module____qualname____doc__r   r,   r-   __classcell__r%   r%   r#   r&   r   +   s   r   c                 C   sP   t | tkrt| dkr|  S dd }d|| d d  d|| dd   dS )	N   c                 S   s   d dd | D S )Nz, c                 s   s   | ]}t |V  qd S r   )repr).0xr%   r%   r&   	<genexpr>H   s     z?summarize_if_long_list.<locals>.format_chunk.<locals>.<genexpr>)join)chunkr%   r%   r&   format_chunkG   s    z,summarize_if_long_list.<locals>.format_chunk[   z, ..., ])typelistlen)objr:   r%   r%   r&   summarize_if_long_listC   s    rC   c                   @   s  e Zd ZdZedddZedd Zeeddd	Z	ee
e dd
dZeedddZeedddZeedddZeedddZee
e dddZeedddZee
ee  dddZee
ee  dddZeedddZee
e dddZd S )!MetricInfoMixinu   This base class exposes some attributes of MetricInfo
    at the base level of the Metric for easy access.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    )infoc                 C   s
   || _ d S r   _metric_info)r   rE   r%   r%   r&   r   Y   s    zMetricInfoMixin.__init__c                 C   s   | j S )zN:class:`datasets.MetricInfo` object containing all the metadata in the metric.rF   r+   r%   r%   r&   rE   \   s    zMetricInfoMixin.inforeturnc                 C   s   | j jS r   )rG   metric_namer+   r%   r%   r&   namea   s    zMetricInfoMixin.namec                 C   s   | j jS r   )rG   experiment_idr+   r%   r%   r&   rL   e   s    zMetricInfoMixin.experiment_idc                 C   s   | j jS r   )rG   descriptionr+   r%   r%   r&   rM   i   s    zMetricInfoMixin.descriptionc                 C   s   | j jS r   )rG   citationr+   r%   r%   r&   rN   m   s    zMetricInfoMixin.citationc                 C   s   | j jS r   )rG   featuresr+   r%   r%   r&   rO   q   s    zMetricInfoMixin.featuresc                 C   s   | j jS r   )rG   inputs_descriptionr+   r%   r%   r&   rP   u   s    z"MetricInfoMixin.inputs_descriptionc                 C   s   | j jS r   )rG   homepager+   r%   r%   r&   rQ   y   s    zMetricInfoMixin.homepagec                 C   s   | j jS r   )rG   licenser+   r%   r%   r&   rR   }   s    zMetricInfoMixin.licensec                 C   s   | j jS r   )rG   codebase_urlsr+   r%   r%   r&   rS      s    zMetricInfoMixin.codebase_urlsc                 C   s   | j jS r   )rG   reference_urlsr+   r%   r%   r&   rT      s    zMetricInfoMixin.reference_urlsc                 C   s   | j jS r   )rG   
streamabler+   r%   r%   r&   rU      s    zMetricInfoMixin.streamablec                 C   s   | j jS r   )rG   formatr+   r%   r%   r&   rV      s    zMetricInfoMixin.formatN)r.   r/   r0   r1   r   r   propertyrE   strrK   r   rL   rM   rN   r   rO   rP   rQ   rR   r   rS   rT   boolrU   rV   r%   r%   r%   r&   rD   M   s8   
rD   c                   @   sP  e Zd ZdZedd2ee eee eeee ee ee	ee
f d		d
dZdd Zdd Zdd Zd3eeef dddZeee ee f dddZdd Zdd Zdd Zdddee dddZdddd d!Zddd"d#d$Zd4d%d&Zedd'd(Zd5ee ee d)d*d+Z d,d- Z!ddde"ee#f dd.d/Z$d0d1 Z%dS )6Metricu  A Metric is the base class and common API for all metrics.

    <Deprecated version="2.5.0">

    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate

    </Deprecated>

    Args:
        config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data
            to be overridden when the metric loading script is modified.
        keep_in_memory (:obj:`bool`): keep all predictions and references in memory. Not possible in distributed settings.
        cache_dir (``str``): Path to a directory in which temporary prediction/references data will be stored.
            The data directory should be located on a shared file-system in distributed setups.
        num_process (``int``): specify the total number of nodes in a distributed settings.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        process_id (``int``): specify the id of the current process in a distributed setup (between 0 and num_process-1)
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        seed (:obj:`int`, optional): If specified, this will temporarily set numpy's random seed when :func:`datasets.Metric.compute` is run.
        experiment_id (``str``): A specific experiment id. This is used if several distributed evaluations share the same file system.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        max_concurrent_cache_files (``int``): Max number of concurrent metrics cache files (default 10000).
        timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
    uO   Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluateNFr   r   '  d   )	config_namekeep_in_memory	cache_dirnum_process
process_idseedrL   max_concurrent_cache_filesr'   c
                 K   s  |pd| _ |  }t| jj|_| j |_ |p.d|_t| | t	|t
rP|dk rXtdt	|t
rj||krrtd|r|dkrtd|| _|| _|| _|| _tj|ptj| _|  | _|d krtj ^}}}}|dk r|| n|d | _n|| _|	| _tt| j | | _ tt| j!| | _!tt| j"| | _"| j j# j$| j%j&7  _$| j!j# j$| j%j&7  _$| j"j# j$| j%j&7  _$d | _'d | _(d | _)d | _*d | _+d | _,d | _-d | _.d | _/d S )	NdefaultZdefault_experimentr   z.'process_id' should be a number greater than 0z8'num_process' should be a number greater than process_idr   zPUsing 'keep_in_memory' is not possible in distributed setting (num_process > 1).ip  )0r]   _infor   r$   r.   rJ   rL   rD   r   
isinstanceint
ValueErrorr`   ra   rc   r^   ospath
expanduserr	   ZHF_METRICS_CACHE_data_dir_root_build_data_dirdata_dirnprandomZ	get_staterb   r'   types
MethodTyper   compute	add_batchadd__func__r1   rE   rP   
buf_writerwriterwriter_batch_sizedatacache_file_namer   rendez_vous_lock
file_paths	filelocks)r   r]   r^   r_   r`   ra   rb   rL   rc   r'   r"   rE   _posr%   r%   r&   r      sL    


zMetric.__init__c                 C   s   | j dkrdS t| j S )zReturn the number of examples (predictions or predictions/references pair)
        currently stored in the metric's cache.
        Nr   )rx   rA   r+   r%   r%   r&   __len__   s    zMetric.__len__c              	   C   s(   d| j  d| j d| j dt|  d	S )NzMetric(name: "z", features: z, usage: """z""", stored examples: ))rK   rO   rP   rA   r+   r%   r%   r&   __repr__   s    &zMetric.__repr__c                 C   s,   | j }tj|| j| j}tj|dd |S )a  Path of this metric in cache_dir:
        Will be:
            self._data_dir_root/self.name/self.config_name/self.hash (if not none)/
        If any of these element is missing or if ``with_version=False`` the corresponding subfolders are dropped.
        T)exist_ok)rl   ri   rj   r8   rK   r]   makedirs)r   Zbuilder_data_dirr%   r%   r&   rm      s    zMetric._build_data_dirrH   c                 C   s   t j| j| j d| j d| j d}d}t| jD ]}t	|d }z|j
|d W n tk
r   | jdkrtd| d| j d	d|| jd krtd
| j ddtt }t j| j| j d| d| j d| j d}Y q6X  qq6||fS )zTCreate a new cache file. If the default cache file is used, we generated a new hash.-.arrowN.lockr'   r   z^Error in _create_cache_file: another metric instance is already using the local cache file at .. Please specify an experiment_id (currently: :) to avoid collision between distributed metric instances.zCannot acquire lock, too many metric instance are operating concurrently on this file system.You should set a larger value of max_concurrent_cache_files when creating the metric (current value is z).)ri   rj   r8   rn   rL   r`   ra   rangerc   r   r(   r   rh   rX   uuiduuid4)r   r'   	file_pathr   iZ	file_uuidr%   r%   r&   _create_cache_file	  s4    (
  
zMetric._create_cache_filec              
      s    j dkr& jdkrtd jg}n fddt j D }g }t|D ]t\}}|dkrh| j qJt|d }z|j j	d W n, t
k
r   td	| d
| ddY qJX || qJ||fS )zGet a lock on all the cache files in a distributed setup.
        We wait for timeout second to let all the distributed node finish their tasks (default is 100 seconds).
        r   Nz|Metric cache file doesn't exist. Please make sure that you call `add` or `add_batch` at least once before calling `compute`.c                    s2   g | ]*}t j j j d  j d | dqS )r   r   ri   rj   r8   rn   rL   r`   r5   ra   r+   r%   r&   
<listcomp>6  s   z/Metric._get_all_cache_files.<locals>.<listcomp>r   r   r   z#Cannot acquire lock on cached file z for process .)r`   r{   rh   r   	enumerateappendr   r   r(   r'   r   )r   r}   r~   ra   r   r   r%   r+   r&   _get_all_cache_files*  s0    



zMetric._get_all_cache_filesc              
      sx    fddt  jD }|D ]V}t|}z|j jd W n. tk
rh   td| d j dd Y qX |  qd S )Nc                    s2   g | ]*}t j j j d  j d | dqS )r   z.arrow.lockr   r   r+   r%   r&   r   N  s   z5Metric._check_all_processes_locks.<locals>.<listcomp>r   Expected to find locked file  from process  but it doesn't exist.)	r   r`   r   r(   r'   r   rh   ra   r*   )r   Zexpected_lock_file_namesexpected_lock_file_name
nofilelockr%   r+   r&   _check_all_processes_locksM  s    
z!Metric._check_all_processes_locksc              	   C   s   t j| j| j d| j d}t|}z|j| jd W n. t	k
rh   t
d| d| j dd Y n
X |  t j| j| j d| j d}t|}z|j| jd W n. t	k
r   t
d| d| j d	d Y n
X |  d S )
Nr   z-0.arrow.lockr   r   r   r   	-rdv.lockzCouldn't acquire lock on r   )ri   rj   r8   rn   rL   r`   r   r(   r'   r   rh   ra   r*   r   )r   r   r   Zlock_file_namer|   r%   r%   r&   _check_rendez_vous]  s$       zMetric._check_rendez_vousc                 C   s   | j dk	r| j   d| _ | jdk	r8| jdkr8| j  | jrht| jt| j	dd}t
| j | _nv| jdkr|  \}}z4tdt| j	dd}t
f |dd |D | _W n tk
r   tddY nX || _|| _dS )	zClose all the writing process and load/gather the data
        from all the nodes if main node or all_process is True.
        Nr   rO   )rj   rE    c                 S   s   g | ]}d |iqS )filenamer%   )r5   fr%   r%   r&   r     s     z$Metric._finalize.<locals>.<listcomp>zError in finalize: another metric instance is already using the local cache file. Please specify an experiment_id to avoid collision between distributed metric instances.)rx   finalizer   ra   r*   r^   r   rn   r   rO   r
   Zfrom_bufferrw   getvaluerz   r   Z
read_filesFileNotFoundErrorrh   r}   r~   )r   readerr}   r~   r%   r%   r&   	_finalizeq  s*    



 zMetric._finalizepredictions
referencesc             	      s  ||d |dkr>|dkr> fddj D } | n2 fddj D }|rptd| dtj   fd	dj D }fd
dD }tdd | D rjf |   d_d_	j
dkrjjjjd fddj D }tj jf ||}W 5 Q R X jdk	rDd_`d_nXtttjjD ]@\}	}
td|
  `d_`d_t|
 |	  qZ|S dS dS )ai  Compute the metrics.

        Usage of positional arguments is not allowed to prevent mistakes.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.
            **kwargs (optional): Keyword arguments that will be forwarded to the metrics :meth:`_compute`
                method (see details in the docstring).

        Return:
            dict or None

            - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``).
            - None if the metric is not run on the main process (``process_id != 0``).

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> accuracy = metric.compute(predictions=model_prediction, references=labels)
        ```
        r   Nc                    s   i | ]}| kr|d qS r   r%   r5   k
all_kwargsr%   r&   
<dictcomp>  s       z"Metric.compute.<locals>.<dictcomp>c                    s   g | ]}| kr|qS r%   r%   r   r   r%   r&   r     s      z"Metric.compute.<locals>.<listcomp>zMetric inputs are missing: . All required inputs are c                    s   i | ]}| | qS r%   r%   r5   Z
input_namer   r%   r&   r     s      c                    s    i | ]}|j kr| | qS r%   r   r   )r"   r   r%   r&   r     s     
  c                 s   s   | ]}|d k	V  qd S r   r%   )r5   vr%   r%   r&   r7     s     z!Metric.compute.<locals>.<genexpr>r   )r?   c                    s   i | ]}| j | qS r%   )rz   r   r+   r%   r&   r     s      z	Removing )rO   updaterh   r@   anyvaluesrt   r   r{   r   ra   rz   Z
set_formatrE   rV   r   rb   _computerw   reversedzipr~   r}   loggerrx   ri   remover*   )r   r   r   r"   Zmissing_kwargsZmissing_inputsinputsZcompute_kwargsoutputr   r   r%   )r   r"   r   r&   rs     sF    
zMetric.computec                   s|  fdd|D }|r0t d| dtj ||d|  fddjD  jj  jdkrr  zj  W n tj	k
rv   t
 fd	d
 D rtt  fdd D d }d dt   d| dt |  d	}nftjddgkrHdj d}d fdd
jD }||7 }n dj dt| dt| }t |dY nX dS )a  Add a batch of predictions and references for the metric's stack.

        Args:
            predictions (list/array/tensor, optional): Predictions.
            references (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add_batch(predictions=model_prediction, references=labels)
        ```
        c                    s   g | ]}| j kr|qS r%   r   r   r+   r%   r&   r     s     
 z$Metric.add_batch.<locals>.<listcomp>Bad inputs for metric: r   r   c                    s   i | ]}| | qS r%   r%   r5   Zintput_namebatchr%   r&   r     s      z$Metric.add_batch.<locals>.<dictcomp>Nc                 3   s.   | ]&}t  | t tt  kV  qd S r   )rA   nextiterr   r5   cr   r%   r&   r7     s     z#Metric.add_batch.<locals>.<genexpr>c                    s(   g | ] }t  | t   kr|qS r%   )rA   r   )r   col0r%   r&   r     s      r   zMismatch in the number of z (z) and r   r   r   @Metric inputs don't match the expected format.
Expected format: ,
c                 3   s&   | ]}d | dt  |  V  qdS zInput z: NrC   r   r   r%   r&   r7     s    zPPredictions and/or references don't match the expected format.
Expected format: z,
Input predictions: z,
Input references: )rh   r@   rO   rE   Zencode_batchrx   _init_writerZwrite_batchpaArrowInvalidr   r   r   rA   sortedr8   rC   )r   r   r   r"   
bad_inputsZbad_col	error_msgerror_msg_inputsr%   )r   r   r   r&   rt     s2    
,
zMetric.add_batch)
prediction	referencec                   s   fdd|D }|r0t d| dtj ||d|  fddjD  jj  jdkrr  zj  W nP tj	k
r   d	j d
}d

 fddjD }||7 }t |dY nX dS )a  Add one prediction and reference for the metric's stack.

        Args:
            prediction (list/array/tensor, optional): Predictions.
            reference (list/array/tensor, optional): References.

        Example:

        ```py
        >>> from datasets import load_metric
        >>> metric = load_metric("accuracy")
        >>> metric.add(predictions=model_predictions, references=labels)
        ```
        c                    s   g | ]}| j kr|qS r%   r   r   r+   r%   r&   r     s     
 zMetric.add.<locals>.<listcomp>r   r   r   c                    s   i | ]}| | qS r%   r%   r   exampler%   r&   r     s      zMetric.add.<locals>.<dictcomp>Nr   r   c                 3   s&   | ]}d | dt  |  V  qdS r   r   r   r   r%   r&   r7   $  s    zMetric.add.<locals>.<genexpr>)rh   r@   rO   rE   Zencode_examplerx   r   writer   r   r8   )r   r   r   r"   r   r   r   r%   )r   r   r&   ru   	  s"    
z
Metric.addc              	   C   s,  | j dkr| jdkrtj| j| j d| j  d}t|| _z| jj	|d W n. t
k
r~   td| d| j dd Y nX | jrt | _t| jj| j| jd	| _nJd | _| jd ks| jd kr|  \}}|| _|| _t| jj| j| jd
| _| j dkr(| jdkr |   | j  n|   d S )Nr   r   r   r   r   zXError in _init_writer: another metric instance is already using the local cache file at r   r   )rO   streamry   )rO   rj   ry   )r`   ra   ri   rj   r8   rn   rL   r   r|   r(   TimeoutErrorrh   r^   r   ZBufferOutputStreamrw   r   rE   rO   ry   rx   r{   r   r   r   r*   r   )r   r'   r   r{   r   r%   r%   r&   r   *  sD    

 

  
  zMetric._init_writerc                 C   s   t dS )a  Construct the MetricInfo object. See `MetricInfo` for details.

        Warning: This function is only called once and the result is cached for all
        following .info() calls.

        Returns:
            info: (MetricInfo) The metrics information
        NNotImplementedErrorr+   r%   r%   r&   re   Q  s    	zMetric._info)download_config
dl_managerc                 C   sN   |dkr@|dkr.t  }tj| jd|_d|_t| j|| jd}| 	| dS )a  Downloads and prepares dataset for reading.

        Args:
            download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters.
            dl_manager (:class:`DownloadManager`, optional): Specific download manager to use.
        NZ	downloadsF)Zdataset_namer   rn   )
r   ri   rj   r8   rn   r_   Zforce_downloadr   rK   _download_and_prepare)r   r   r   r%   r%   r&   download_and_prepare\  s      zMetric.download_and_preparec                 C   s   dS )aY  Downloads and prepares resources for the metric.

        This is the internal implementation to overwrite called when user calls
        `download_and_prepare`. It should download all required resources for the metric.

        Args:
            dl_manager (:class:`DownloadManager`): `DownloadManager` used to download and cache data.
        Nr%   )r   r   r%   r%   r&   r   s  s    	zMetric._download_and_preparec                K   s   t dS )zEThis method defines the common API for all the metrics in the libraryNr   )r   r   r   r"   r%   r%   r&   r   ~  s    zMetric._computec                 C   s\   t | dr| jd k	r| j  t | dr<| jd k	r<| j  t | drJ| `t | drX| `d S )Nr   r|   rx   rz   )hasattrr   r*   r|   rx   rz   r+   r%   r%   r&   __del__  s    



zMetric.__del__)	NFNr   r   NNr[   r\   )r   )r   )NN)&r.   r/   r0   r1   r   r   rX   rY   rg   r   floatr   r   r   rm   r   r   r   r   r   r   r   r   dictrs   rt   ru   r   r   re   r   r   r   r   r   r   r   r   r%   r%   r%   r&   rZ      sX            
D!#"G/!
'  rZ   )3r1   ri   rq   r   typingr   r   r   r   r   r   numpyro   Zpyarrowr   r   r	   Zarrow_datasetr
   Zarrow_readerr   Zarrow_writerr   Zdownload.download_configr   Zdownload.download_managerr   rO   r   rE   r   r   Znamingr   Zutils.deprecation_utilsr   Zutils.filelockr   r   r   Zutils.loggingr   Zutils.py_utilsr   r   r.   r   r   rC   rD   rZ   r%   r%   r%   r&   <module>   s0    
E