U
    ,:%ef                  8   @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@h8Zee eee dAdBdCZG dDdE dEe
ZdS )F    N)Path)IterableListTupleUnion)Dataset)download_url_to_fileZ@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4Z@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbolsz!EXCLAMATION-POINTz"CLOSE-QUOTEz"DOUBLE-QUOTEz"END-OF-QUOTEz
"END-QUOTEz
"IN-QUOTESz"QUOTEz"UNQUOTEz
#HASH-MARKz#POUND-SIGNz#SHARP-SIGNz%PERCENTz
&AMPERSANDz'END-INNER-QUOTEz
'END-QUOTEz'INNER-QUOTEz'QUOTEz'SINGLE-QUOTEz(BEGIN-PARENSz(IN-PARENTHESESz(LEFT-PARENz(OPEN-PARENTHESESz(PARENz(PARENSz(PARENTHESESz)CLOSE-PARENz)CLOSE-PARENTHESESz
)END-PARENz)END-PARENSz)END-PARENTHESESz)END-THE-PARENz)PARENz)PARENSz)RIGHT-PARENz)UN-PARENTHESESz+PLUSz,COMMAz--DASHz-DASHz-HYPHENz...ELLIPSISz.DECIMALz.DOTz
.FULL-STOPz.PERIODz.POINTz/SLASHz:COLONz;SEMI-COLONz;SEMI-COLON(1)z?QUESTION-MARKz{BRACEz{LEFT-BRACEz{OPEN-BRACEz}CLOSE-BRACEz}RIGHT-BRACE)linesexclude_punctuationsreturnc                 C   s   t d}g }| D ]}|r|dr&q| d\}}|tkrn|rFq|drVd}n|drfd}n|d }t |d|}|d}|||f q|S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)r   r   Z_alt_reZcmudictlinewordZphones r   Z/var/www/html/Darija-Ai-API/env/lib/python3.8/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryJ   s$    



r   c                	   @   s|   e Zd ZdZdddddeeef eeeeddd	d
Ze	e
eee f dddZe	dddZeee dddZdS )CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    TFr	   r
   )downloadurlurl_symbolsN)rootr   r   r   r    r   c          
   	   C   s  || _ t|| _tj| js,td| | jtj| }| jtj| }tj|s|sntd| t	
|d }t||| tj|s|std| t	
|d }t||| t|d}	dd |	 D | _W 5 Q R X t|ddd}	t|	 | j d	| _W 5 Q R X d S )
Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rc                 S   s   g | ]}|  qS r   )r   ).0r   r   r   r   
<listcomp>   s     z$CMUDict.__init__.<locals>.<listcomp>zlatin-1)encoding)r   )r   r   Z
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr   open	readlines_symbolsr   _dictionary)
selfr!   r   r   r   r    Z	dict_fileZsymbol_fileZchecksumtextr   r   r   __init__{   s0    

zCMUDict.__init__)nr   c                 C   s
   | j | S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )r1   )r2   r5   r   r   r   __getitem__   s    zCMUDict.__getitem__)r   c                 C   s
   t | jS )N)lenr1   r2   r   r   r   __len__   s    zCMUDict.__len__c                 C   s
   | j  S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)r0   copyr8   r   r   r   symbols   s    zCMUDict.symbols)T)__name__
__module____qualname____doc__r   strr   boolr4   intr   r   r6   r9   propertyr;   r   r   r   r   r   i   s$    
)r   )r&   r   pathlibr   typingr   r   r   r   Ztorch.utils.datar   Ztorchaudio._internalr   r,   r   r@   rA   r   r   r   r   r   r   <module>   s   <