U
    -er7                     @   s~  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ eeZG d
d dZG dd de
ZG dd dee
ZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%G dd deZ&G d d! d!eZ'G d"d# d#Z(dS )$    N)ABCabstractmethod)Path)DictListOptionalTypeUnion   )config   )FileLock)
get_loggerc                   @   sV   e Zd Zdee dddZeedddZeeedd	d
ZdeeedddZ	dS )ExtractManagerN)	cache_dirc                 C   s&   |rt j|tjntj| _t| _d S N)	ospathjoinr   ZEXTRACTED_DATASETS_DIRZEXTRACTED_DATASETS_PATHextract_dir	Extractor	extractor)selfr    r   W/var/www/html/Darija-Ai-Train/env/lib/python3.8/site-packages/datasets/utils/extract.py__init__   s    zExtractManager.__init__r   returnc                 C   s,   ddl m} tj|}tj| j||S )Nr   )hash_url_to_filename)Z
file_utilsr   r   r   abspathr   r   )r   r   r   abs_pathr   r   r   _get_output_path   s    zExtractManager._get_output_path)output_pathforce_extractr   c                 C   s*   |p(t j| o(t j|o&t | S r   )r   r   isfileisdirlistdir)r   r"   r#   r   r   r   _do_extract%   s    $zExtractManager._do_extractF)
input_pathr#   r   c                 C   s>   | j |}|s|S | |}| ||r:| j ||| |S r   )r   infer_extractor_formatr!   r'   extract)r   r(   r#   extractor_formatr"   r   r   r   r*   *   s    
zExtractManager.extract)N)F)
__name__
__module____qualname__r   strr   r!   boolr'   r*   r   r   r   r   r      s   r   c                   @   sV   e Zd Zeeeeef edddZ	e
eeeef eeef ddddZdS )BaseExtractorr   c                 K   s   d S r   r   clsr   kwargsr   r   r   is_extractable5   s    zBaseExtractor.is_extractableNr(   r"   r   c                 C   s   d S r   r   )r(   r"   r   r   r   r*   :   s    zBaseExtractor.extract)r,   r-   r.   classmethodr   r	   r   r/   r0   r5   staticmethodr*   r   r   r   r   r1   4   s   r1   c                   @   sZ   e Zd ZU g Zee ed< eee	e
f edddZed
ee	e
f eedddZd	S )MagicNumberBaseExtractormagic_numbersr   magic_number_lengthc              
   C   s,   t | d}||W  5 Q R  S Q R X d S )Nrb)openread)r   r<   fr   r   r   read_magic_numberC   s    z*MagicNumberBaseExtractor.read_magic_number    r   magic_numberr   c                    sX    s@t dd | jD }z| || W n tk
r>   Y dS X t fdd| jD S )Nc                 s   s   | ]}t |V  qd S r   )len.0Zcls_magic_numberr   r   r   	<genexpr>K   s     z:MagicNumberBaseExtractor.is_extractable.<locals>.<genexpr>Fc                 3   s   | ]}  |V  qd S r   )
startswithrF   rD   r   r   rH   P   s     )maxr:   rA   OSErrorany)r3   r   rD   r<   r   rJ   r   r5   H   s    z'MagicNumberBaseExtractor.is_extractableN)rB   )r,   r-   r.   r:   r   bytes__annotations__r8   r	   r   r/   intrA   r7   r0   r5   r   r   r   r   r9   @   s
   
r9   c                   @   sZ   e Zd Zeeeef edddZe	dd Z
e	eeef eeef dddd	ZdS )
TarExtractorr   c                 K   s
   t |S r   )tarfile
is_tarfiler2   r   r   r   r5   T   s    zTarExtractor.is_extractablec                 #   s   t t dddt t tdfdd t td fdd	}|}| D ]} |j|rntd
|j d qH| r|||rtd
|j d|j  qH| r|||rtd
|j d|j  qH|V  qHdS )a  
        Fix for CVE-2007-4559
        Desc:
            Directory traversal vulnerability in the (1) extract and (2) extractall functions in the tarfile
            module in Python allows user-assisted remote attackers to overwrite arbitrary files via a .. (dot dot)
            sequence in filenames in a TAR archive, a related issue to CVE-2001-1267.
        See: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2007-4559
        From: https://stackoverflow.com/a/10077309
        r   c                 S   s   t jt j| S r   )r   r   realpathr   )r   r   r   r   resolvedd   s    z*TarExtractor.safemembers.<locals>.resolved)r   baser   c                    s    t j|| | S r   )r   r   r   rI   )r   rV   )rU   r   r   badpathg   s    z)TarExtractor.safemembers.<locals>.badpath)rV   r   c                    s*   t j|t j| j} | j|dS )N)rV   )r   r   r   dirnamenamelinkname)inforV   ZtiprW   rU   r   r   badlinkk   s    z)TarExtractor.safemembers.<locals>.badlinkzExtraction of z is blocked (illegal path)z is blocked: Symlink to z is blocked: Hard link to N)r/   r0   rY   loggererrorissymrZ   islnk)membersr"   r]   rV   Zfinfor   r\   r   safemembersX   s    zTarExtractor.safemembersNr6   c                 C   s:   t j|dd t| }|j|t||d |  d S )NTexist_ok)rb   )r   makedirsrR   r>   
extractallrQ   rc   close)r(   r"   Ztar_filer   r   r   r*   |   s    
zTarExtractor.extract)r,   r-   r.   r7   r	   r   r/   r0   r5   r8   rc   r*   r   r   r   r   rQ   S   s   
#rQ   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )GzipExtractors   Nr6   c              
   C   s>   t | d(}t|d}t|| W 5 Q R X W 5 Q R X d S Nr=   wb)gzipr>   shutilcopyfileobj)r(   r"   Z	gzip_fileextracted_filer   r   r   r*      s    zGzipExtractor.extract	r,   r-   r.   r:   r8   r	   r   r/   r*   r   r   r   r   ri      s   ri   c                       sd   e Zd ZdddgZedeeef ee	d fddZ
eeeef eeef dd	d
dZ  ZS )ZipExtractors   PKs   PKs   PKrB   rC   c              	      sD  t  j||drdS zddlm}m}m}m}m}m}m	}	m
}
m}m} t|d}|	|}|r|| dkr|| dkr|| dkrW 5 Q R  W dS || || kr|||  | || kr|| |
kr||
}t||
krt||}|| |krW 5 Q R  W dS W 5 Q R X W dS  tk
r>   Y dS X d S )NrJ   Tr   )
_CD_SIGNATURE_ECD_DISK_NUMBER_ECD_DISK_START_ECD_ENTRIES_TOTAL_ECD_OFFSET	_ECD_SIZE_EndRecDatasizeCentralDirstringCentralDirstructCentralDirr=   F)superr5   zipfilerr   rs   rt   ru   rv   rw   rx   ry   rz   r{   r>   seektellr?   rE   structunpack	Exception)r3   r   rD   rr   rs   rt   ru   rv   rw   rx   ry   rz   r{   fpendrecdatacentdir	__class__r   r   r5      s(    0$ 
zZipExtractor.is_extractableNr6   c              	   C   s<   t j|dd t| d}|| |  W 5 Q R X d S )NTrd   r)r   rf   r}   ZipFilerg   rh   )r(   r"   zip_filer   r   r   r*      s    
zZipExtractor.extract)rB   )r,   r-   r.   r:   r7   r	   r   r/   rN   r0   r5   r8   r*   __classcell__r   r   r   r   rq      s   "$rq   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )XzExtractors   7zXZ Nr6   c              
   C   s<   t | (}t|d}t|| W 5 Q R X W 5 Q R X d S )Nrk   )lzmar>   rm   rn   r(   r"   compressed_filero   r   r   r   r*      s    zXzExtractor.extractrp   r   r   r   r   r      s   r   c                   @   s:   e Zd ZddgZeeeef eeef ddddZdS )RarExtractors   Rar! s   Rar! Nr6   c                 C   sD   t jstddd l}tj|dd || }|| |  d S )NzPlease pip install rarfiler   Trd   )	r   ZRARFILE_AVAILABLEImportErrorrarfiler   rf   ZRarFilerg   rh   )r(   r"   r   rfr   r   r   r*      s    

zRarExtractor.extractrp   r   r   r   r   r      s   r   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )ZstdExtractors   (/Nr6   c              
   C   sZ   t jstddd l}| }t| d(}t|d}||| W 5 Q R X W 5 Q R X d S )NzPlease pip install zstandardr   r=   rk   )r   ZZSTANDARD_AVAILABLEr   Z	zstandardZZstdDecompressorr>   copy_stream)r(   r"   zstdZdctxZifhZofhr   r   r   r*      s    zZstdExtractor.extractrp   r   r   r   r   r      s   r   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )Bzip2Extractors   BZhNr6   c              
   C   s>   t | d(}t|d}t|| W 5 Q R X W 5 Q R X d S rj   )bz2r>   rm   rn   r   r   r   r   r*      s    zBzip2Extractor.extractrp   r   r   r   r   r      s   r   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )SevenZipExtractors   7z'Nr6   c              	   C   sJ   t jstddd l}tj|dd || d}|| W 5 Q R X d S )NzPlease pip install py7zrr   Trd   r   )r   ZPY7ZR_AVAILABLEr   py7zrr   rf   ZSevenZipFilerg   )r(   r"   r   archiver   r   r   r*      s    zSevenZipExtractor.extractrp   r   r   r   r   r      s   r   c                   @   s8   e Zd ZdgZeeeef eeef ddddZdS )Lz4Extractors   "MNr6   c              
   C   sV   t jstddd l}|j| d(}t|d}t|| W 5 Q R X W 5 Q R X d S )NzPlease pip install lz4r   r=   rk   )r   ZLZ4_AVAILABLEr   Z	lz4.frameframer>   rm   rn   )r(   r"   lz4r   ro   r   r   r   r*     s    zLz4Extractor.extractrp   r   r   r   r   r     s   r   c                
   @   s   e Zd ZU eeeeeee	e
ed	Zeeee f ed< edd Zeeeef edddZedeeef eed	d
dZeeeef edddZedeeef eeef ee ee ddddZdS )r   )	tarrl   zipxzZrarr   r   Z7zr   
extractorsc                 C   s   t dd | j D S )Nc                 s   s,   | ]$}t |tr|jD ]}t|V  qqd S r   )
issubclassr9   r:   rE   )rG   r   Zextractor_magic_numberr   r   r   rH     s
   
 z9Extractor._get_magic_number_max_length.<locals>.<genexpr>)rK   r   values)r3   r   r   r   _get_magic_number_max_length  s    z&Extractor._get_magic_number_max_lengthr;   c                 C   s,   zt j| |dW S  tk
r&   Y dS X d S )N)r<   rB   )r9   rA   rL   r;   r   r   r   _read_magic_number&  s    zExtractor._read_magic_numberF)r   return_extractorr   c                 C   s>   t jdtd | |}|r2|s$dS d| j| fS |s:dS dS )Nz{Method 'is_extractable' was deprecated in version 2.4.0 and will be removed in 3.0.0. Use 'infer_extractor_format' instead.categoryTF)FN)warningswarnFutureWarningr)   r   )r3   r   r   r+   r   r   r   r5   -  s    
zExtractor.is_extractabler   c                 C   sB   |   }| ||}| j D ]\}}|j||dr|  S qd S )NrJ   )r   r   r   itemsr5   )r3   r   Zmagic_number_max_lengthrD   r+   r   r   r   r   r)   9  s
    z Extractor.infer_extractor_formatN
deprecated)r(   r"   r+   r   r   c              
   C   s   t jt j|dd tt|d}t| tj	|dd |sL|dkr|dks^t
|ts~tjdtd |dkrx|n|}n
| j| }|||W  5 Q R  S tjdtd | j D ]*}||r|||  W  5 Q R  S qW 5 Q R X d S )	NTrd   z.lock)ignore_errorsr   zsParameter 'extractor' was deprecated in version 2.4.0 and will be removed in 3.0.0. Use 'extractor_format' instead.r   ztParameter 'extractor_format' was made required in version 2.4.0 and not passing it will raise an exception in 3.0.0.)r   rf   r   rX   r/   r   with_suffixr   rm   rmtree
isinstancer   r   r   r   r*   r   r5   )r3   r(   r"   r+   r   Z	lock_pathr   r   r   r*   A  s(    


zExtractor.extract)F)Nr   )r,   r-   r.   rQ   ri   rq   r   r   r   r   r   r   r   r   r/   r   r1   rO   r7   r   r8   r	   r   rP   r   r0   r5   r)   r   r*   r   r   r   r   r     s8   

  

r   ))r   rl   r   r   rm   r   rR   r   r}   abcr   r   pathlibr   typingr   r   r   r   r	    r   Zfilelockr   loggingr   r,   r^   r   r1   r9   rQ   ri   rq   r   r   r   r   r   r   r   r   r   r   r   <module>   s8   1
4

