U
    }fR                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlZd dlmZ d dlZd dlZd	d
 Zdd Zdd Zdd Zdd Zdd Zd1ddZdd Z dd Z!dd Z"ee
e#e	f  ee
e#e	f  ee
e#e	f  ddd Z$ee
e#e	f  ee
e#e	f  ee
e#e	f  dd!d"Z%d#d$ Z&d%d& Z'd'd( Z(d)d* Z)d+d, Z*d-d. Z+d/d0 Z,dS )2    N)detect)Counter	stopwords)AnyDictList)medianstdev)
STOP_WORDS)tqdmc                 C   s   t | d}| }W 5 Q R X g }|D ]f}t|}|d|d|d|d|d|d|d|d	|d
d	}|| q&|S )Nrcidtextauthorchannelvotesphotoheartreplytime_parsed)	r   r   r   r   r   r   r   r   r   )open	readlinesjsonloadsgetappend)	file_pathfiledataresultline	json_dataZ
attributes r#   P/var/www/html/Presentation_Sanoa_Al_oula_2024_Ramdan/src/apps/dashboard/utils.pyread_json_file   s"    
r%   c           	      C   s   i }| D ]F}|d }t j |d }||kr||kr>d||< ||  d7  < qg }|D ],}|d}||d}|||gd qXdd |D S )	N	Timestampz%Y-%m-%d %H:%M:%Sr      z%A)namer   c                 S   s   g | ]}|d  d qS )r   r   r#   ).0itemr#   r#   r$   
<listcomp>E   s     z%count_ads_by_date.<locals>.<listcomp>)datetimestrptimedatestrftimer   r   )	ads_dataZ
week_datesZ
date_countadZdate_strdate_obj
chart_dataday_namecountr#   r#   r$   count_ads_by_date-   s"    

r6   c                    sV   t t| d\}}tj| d| dd  fddtdD }dd |D }|S )Nz-Wz-1z	%Y-W%W-%wc                    s   g | ]} t j|d  qS ))days)r,   	timedelta)r)   iZ	first_dayr#   r$   r+   J   s     z"get_week_dates.<locals>.<listcomp>   c                 S   s$   g | ]}t j |d d  qS )%Y-%m-%d)r,   r-   r/   r.   )r)   r.   r#   r#   r$   r+   K   s     )mapintsplitr,   r-   range)Zweek_stringyearZweek_numdatesZdate_objectsr#   r:   r$   get_week_datesG   s
    rC   c                 C   s*   | D ] }|d |kr| dg   S qg S )Nidbrands)r   )categories_datarD   categoryr#   r#   r$   get_brands_by_idN   s    rH   c                 C   sj   | d}dd |D }| D ]$}|d }||kr||  d7  < qdd | D }dd | D }|S )	N,c                 S   s   i | ]
}|d qS r   r#   )r)   brandr#   r#   r$   
<dictcomp>Y   s      z&count_ads_by_brand.<locals>.<dictcomp>Brandr'   c                 S   s   i | ]\}}|d kr||qS rJ   r#   r)   rK   r5   r#   r#   r$   rL   c   s       c                 S   s   g | ]\}}||d qS ))r(   yr#   rN   r#   r#   r$   r+   f   s     z&count_ads_by_brand.<locals>.<listcomp>)r?   items)r0   	brand_strrE   Zbrand_countr1   rK   r   r#   r#   r$   count_ads_by_brandT   s    
rR   c                 C   s   |  dd  }|dkr,tj| dgd}n$|dkrHtj| dgdd}ntd	|jd
dgdd}|jdd}|D ]}t|d |d< d|d< qrt	
|}t	|}|S )N.Zxlsxr&   )parse_datesZcsvz%m/%d/%Y %I:%M:%S %p)rU   date_formatzFile type not supported.Z	Ad_IframeZ
Day_Iframer'   )Zaxisrecords)Zorientr   r5   )r?   lowerpd
read_excelZread_csv
ValueErrordropZto_dictstrr   dumpsr   )r   Z	file_typedfr   datumr"   json_objectr#   r#   r$   get_excel_adsn   s    


rb   F   c              
      s  t | d}t|}W 5 Q R X g }d}tt|dd}|D ]}|d |d D ]}	|	d |krR|	d  |	d }
t fd	d
|D d }|d kr g g ddddd}|| |	d D ]&}|d |d kr|d |d  q|	d D ]&}|d |d kr|d |d  q|d  d7  < |d  |
7  < qRq<|D ]8}|d dkr8|d |d  |d< |d | |d< q8g }|D ]:}|d dkrzd|d krzd|d krz|| qz|  |S )Nr      zProcessing frames)totaldescr'   responseZ
ConfidenceNamec                 3   s   | ]}|d   kr|V  qdS )objectNr#   )r)   dr(   r#   r$   	<genexpr>   s      z'generate_static_json.<locals>.<genexpr>r   )ri   parents
categoriesr5   total_confidenceavg_confidencesecondsZParentsrm   Z
Categoriesrn   r5   ro   rp   rq      zEvents and AttractionszWeapons and Military)	r   r   loadr   lenupdatenextr   close)Zoriginal_json_file_pathZstatic_json_file_pathconfidence_valuefZoriginal_jsonresultsZfpsZpbarframeobjZ
confidenceobj_dictparentrG   Zfinal_resultr#   rk   r$   generate_static_json   sP    
	
*r   c                 C   s   t t| S N)setr   words)langr#   r#   r$   get_stopwords   s    r   c                 C   s   t j| |d}g }tt|D ]n}tj|d | ddtj|d | ddtj|d | dd|d | dd	}|| qt	|}t
|}|S )

    Reads an Excel file and converts its data to a JSON object.

    Args:
        filepath (str): The file path of the Excel file.

    Returns:
        A JSON object containing the data from the Excel file.
    )
sheet_nameZ	StartDater<   Z	StartTime%H:%M:%SZEndTimez
Event Namer   )r.   
time starttime endshowviews)rY   rZ   r@   rt   r,   r-   r/   r   r   r^   r   )filepathr   r_   r   r9   r`   r"   ra   r#   r#   r$   excel_to_json_epg   s    


r   c                 C   s   t | }g }tt|D ]`}dtj|d | ddtj|d | ddtt|d | d d	}|	| qt
|}t
|}|S )
r   zAl-Aouladayr<   minuter   %I:%M %pZpurcenti@  )r   r.   timeviewers)rY   rZ   r@   rt   r,   r-   r/   r>   roundr   r   r^   r   )r   r_   r   r9   r`   r"   ra   r#   r#   r$   excel_to_json_views   s    


r   )json1json2returnc           
      C   s   g }| D ]}|  }tj|d d }|D ]\}tj|d d }tj|d d }|d }	||  krz|kr.n q.|	|d<  qq.|| q|S )a^  
    Adds a 'show' key to each dictionary in json1 based on the time range in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON1, with a 'show' key added to each item.
    r   r   r   r   r   r   )copyr,   r-   r   r   )
r   r   updated_jsonitem1updated_itemr   item2
start_timeend_timer   r#   r#   r$   add_show_to_views  s    r   c                    s   g }|D ]}|  }tj|d d tj|d d  |d } fdd| D }t||d< |rtt|nd|d	< |rt|nd|d
< |rt|t| nd|d< |rt|nd|d< t|dkrt	|nd|d< |
| q|S )a  
    Calculates some statistics based on the views in json1 within the time range of each show in json2.
    Adds attributes 'total', 'max', 'min', 'mean', 'median', 'ecart-type views' to each show in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON2, with added attributes to each show.
    r   r   r   r   c                    s>   g | ]6}t j |d  d   kr. krn q|d qS )r   r   r   )r,   r-   r   )r)   r   r   r   r#   r$   r+   O  s
        z%add_calcul_to_epg.<locals>.<listcomp>re   r   maxminmeanr	   r'   z
ecart-type)r   r,   r-   r   sumr   r   rt   r	   r
   r   )r   r   r   r   r   r   Zviews_in_ranger#   r   r$   add_calcul_to_epg6  s    r   c                    s   t | }|dkr td}t n|dkr8td}t ng S |dsP|d || } fdd|D }t|}t|	 t
| dd	 d
d}fdd|D S )a  
    Returns a list of words in the input text, in descending order of frequency rank.

    Parameters:
        text (str): The input text to extract words from.

    Returns:
        list: A list of dictionaries containing each word in the text, its frequency rank, and count.
    enZen_core_web_smfrZfr_core_news_smZsentencizerc                    s,   g | ]$}|j s|js|j  kr|jqS r#   )Zis_stopZis_punctr   rX   )r)   tokenr   r#   r$   r+   z  s
        z speech_words.<locals>.<listcomp>c                 S   s   | d S Nr'   r#   xr#   r#   r$   <lambda>      zspeech_words.<locals>.<lambda>Tkeyreversec                    s    g | ]\}}||  |d qS )wordrankr5   r#   r)   r   r5   total_wordsr#   r$   r+     s     )r   spacyrs   r   FR_STOP_WORDSZhas_pipeZadd_piper   r   valuessortedrP   )r   r   Znlpdocr   word_countssorted_wordsr#   r   r   r$   speech_words_  s     



r   c                    s   t | }|dkrtd n|dkr,td ng  |  } t| } fdd|D }dd |D }t|}t| t|	 dd	 d
d}fdd|D }|S )Nr   englishr   frenchc                    s   g | ]}| kr|qS r#   r#   r)   r   r   r#   r$   r+     s      z#speech_words_v1.<locals>.<listcomp>c                 S   s"   g | ]}t d d |D s|qS )c                 s   s   | ]}|t jkV  qd S r   )stringpunctuation)r)   charr#   r#   r$   rl     s     z-speech_words_v1.<locals>.<listcomp>.<genexpr>)allr   r#   r#   r$   r+     s      c                 S   s   | d S r   r#   r   r#   r#   r$   r     r   z!speech_words_v1.<locals>.<lambda>Tr   c                    s    g | ]\}}||  |d qS r   r#   r   r   r#   r$   r+     s     )
r   r   rX   nltkword_tokenizer   r   r   r   rP   )r   r   r   filtered_wordsZcleaned_wordsr   r   ranked_wordsr#   r   r$   speech_words_v1  s    


r   c              -      sD  t | }|dkrjdddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+g* nl|d,krd-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFddGdHdIdJdKdLdMdNdOdPdOdQdRdSdMdTdUdVg- ng  | tdWdWtj} |  } t| } fdXdY|D }t	|}t
| dZd[ d\d]}d^dY t|D }|S )_Nr   aantheandorbutifthenitisarewaswereofforonintothatwiththisatfrombybenotasyouyourtheytheirr9   weourusheshehimherhishersitsr   ununelelalesetoumaissialorsestsont   était   étaientdedudespoursurdans   àaveccecetcettecesparpascommevousvotreilsleurjenousnotreilelleluisonsases c                    s   g | ]}| kr|qS r#   r#   r   r   r#   r$   r+     s      z(speech_words_copy_v4.<locals>.<listcomp>c                 S   s   | d S r   r#   r   r#   r#   r$   r     r   z&speech_words_copy_v4.<locals>.<lambda>Tr   c                 S   s"   g | ]\}\}}||d  dqS )r'   )r   r   r#   )r)   r   r   r5   r#   r#   r$   r+     s    
 )r   	translater]   	maketransr   r   rX   r   r   r   r   rP   	enumerate)r   r   r   r   r   r   r   r#   r   r$   speech_words_copy_v4  s    Z`
r#  c              -      s<  t | }|dkrjdddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+g* nl|d,krd-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFddGdHdIdJdKdLdMdNdOdPdOdQdRdSdMdTdUdVg- ng  | tdWdWtj} |  } t| } fdXdY|D }t	|}t
| dZd[ d\d]}d^dY |D S )_Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  c                    s   g | ]}| kr|qS r#   r#   r   r   r#   r$   r+     s      z(speech_words_copy_v3.<locals>.<listcomp>c                 S   s   | d S r   r#   r   r#   r#   r$   r     r   z&speech_words_copy_v3.<locals>.<lambda>Tr   c                 S   s   g | ]\}}||d qS )r   r5   r#   r   r#   r#   r$   r+     s     )r   r   r]   r!  r   r   rX   r   r   r   r   rP   r   r   r   r   r   r   r#   r   r$   speech_words_copy_v3  s    Z`
r&  c                    sp   t d t| }t d tt| |  } fdd|D }t|}t|	 dd dd}dd |D S )	Nr   c                    s   g | ]}|   kr|qS r#   )rX   r   Z
stop_wordsr#   r$   r+     s      z%speech_words_copy.<locals>.<listcomp>c                 S   s   | d S r   r#   r   r#   r#   r$   r     r   z#speech_words_copy.<locals>.<lambda>Tr   c                 S   s   g | ]\}}||d qS r$  r#   r   r#   r#   r$   r+     s     )
r   Zdownloadr   r   r   r   r?   r   r   rP   r%  r#   r'  r$   speech_words_copy  s    

r(  c                 C   s8   |   }t|}dd | D }t|dd dd}|S )Nc                 S   s   g | ]\}}||d qS r$  r#   r   r#   r#   r$   r+     s     z(speech_words_copy_v1.<locals>.<listcomp>c                 S   s   | d S )Nr5   r#   r   r#   r#   r$   r     r   z&speech_words_copy_v1.<locals>.<lambda>Tr   )r?   r   rP   r   )r   r   r   r    r#   r#   r$   speech_words_copy_v1  s
    r)  c                 C   s`   |   }t|}t| dd dd}dd |D }g }|D ]\}}||kr<|||f q<|S )Nc                 S   s   | d S r   r#   r   r#   r#   r$   r   *  r   z&speech_words_copy_v2.<locals>.<lambda>Tr   c                 S   s   g | ]\}}||fqS r#   r#   r   r#   r#   r$   r+   -  s     z(speech_words_copy_v2.<locals>.<listcomp>)r?   r   r   rP   r   )r   r   r   r   Zunique_wordsr   r5   r#   r#   r$   speech_words_copy_v2"  s    r*  )rc   )-r   r   Z
langdetectr   collectionsr   Znltk.corpusr   typingr   r   r   
statisticsr	   r
   ZpandasrY   r   r   Zspacy.lang.en.stop_wordsr   Zspacy.lang.fr.stop_wordsr   r   r,   r%   r6   rC   rH   rR   rb   r   r   r   r   r]   r   r   r   r   r#  r&  r(  r)  r*  r#   r#   r#   r$   <module>   sD   
F"6$6)1%$!