U
    &f((                     @   sj   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 dZeddede_G d	d
 d
ZdS )    N)config)build)	HttpErrorzX^(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([\w-]{11})ZOPENAPI_API_KEY defaultcastc                   @   s   e Zd ZeedddZeedddZedd Zed	d
 ZedddZ	edddZ
edddZedddZdS )UtilsZvideoUrlc                 C   s   t t| d k	S )N)rematchYOUTUBE_URL_PATTERNr
    r   </var/www/html/YouTubeCommentsAnalyzer/src/app/utils/utils.pyvalidate_youtube_url   s    zUtils.validate_youtube_url)	video_urlc                 C   s2   t t| }|r"d|dddS ddddS d S )NzVideo ID Extracted Successfully   TmessagedatastatuszInvalid YouTube URL formatr   F)r   r   r   group)r   r   r   r   r   extract_video_id   s    zUtils.extract_video_idc              
   C   s  z.t ddtd}tdd|d}g }d }t||k r$| jd| td||d	 }|d
 d }|dg D ]}|d d d |d d d d |d d d d |d d d d |d d d d |d d d d |d d d d |d d d d d}	|	|	 qn|d}|s(q$q(d|ddW S  t
k
rj }
 zd|
 ddd W Y S d }
~
X Y n: tk
r } zd| ddd W Y S d }~X Y nX d S )NZYOUTUBE_API_KEYr   r   youtubeZv3)ZdeveloperKeyzsnippet,repliesd   )partZvideoIdZ
maxResultsZ	pageTokenZpageInfoZtotalResultsitemsZsnippetZtopLevelCommentidZtextOriginalZauthorDisplayNameZauthorProfileImageUrlauthorChannelUrlZ	likeCountZpublishedAtZ	updatedAt)r   textZauthorZauthorAvatarURLr   zLikes CountZpublished_atZ
updated_atZnextPageTokenzComments Extracted SuccessfullyTr   zYouTube API error: FzError fetching comments: )r   strr   lenZcommentThreadslistminZexecutegetappendr   	Exception)video_idnbr_commentsapi_keyr   Zcomments_infoZnext_page_tokenresponseZtotal_commentsitemZcomment_infoZ
http_errorexr   r   r   get_youtube_comments   s@    


&zUtils.get_youtube_commentsc                 C   s   g }g }d}d}d}d}d}| D ]}	d|	d  d}
t |
 }||krT||	 q |t |  t |  | |kr||
7 }||7 }|d7 }q |||  | ||d	 |
}|}d}q |r|||  | ||d	 ||fS )
Nr   r   zYAnalyze the sentiments and overall themes in the following comments in Moroccan Darija:

  

Provide a report in English that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about in 50 words. Additionally, provide more data, a section on what people liked, and a section on what people didn't like.z- r   
   )promptZ
word_countZcomment_count)r!   splitr%   strip)commentsmax_words_promptresultsunwanted_commentsZcurrent_promptZcurrent_word_countZcurrent_comment_countZintro_phraseZending_phrasecommentZcomment_textZcomment_word_countr   r   r   generate_promptsI   s>    $
zUtils.generate_promptstext-embedding-ada-002c                    s   g }t j| dd\}}dd |D }fddtj   fdd|D }tj|D ]n}|| }z| }	||	 W q\ tk
r }
 z,d	| d
t	|
  W Y   W  5 Q R  S d }
~
X Y q\X q\W 5 Q R X |S )N   )r5   c                 S   s   g | ]}|d  qS )r1   r   ).0Zprompt_infor   r   r   
<listcomp>|   s     z.Utils.get_comments_reports.<locals>.<listcomp>c                    s(   t jj | dd ddd}|jd j S )Nr0   i  ffffff?)enginer1   nstop
max_tokenstemperaturer   )openai
Completioncreatechoicesr   r3   )r1   r*   )open_ai_modelr   r   process_prompt   s    z2Utils.get_comments_reports.<locals>.process_promptc                    s   i | ]}  ||qS r   )submit)r<   r1   )executorrI   r   r   
<dictcomp>   s     
 z.Utils.get_comments_reports.<locals>.<dictcomp>zError processing prompt: z, )
r	   r9   
concurrentfuturesThreadPoolExecutoras_completedresultr%   r&   r    )comment_listrH   reportsZprompts_infor7   ZpromptsZreport_futuresfuturer1   reporter   )rK   rH   rI   r   get_comments_reportsw   s    FzUtils.get_comments_reportsc                 C   s^   d}t | ddD ]\}}|| d| d7 }q|d7 }tjj||ddd dd	}|jd
 j S )NzFAnalyze the sentiments and overall themes in the following reports :

r0   )startz. r/   r.     r>   r?   r1   rB   r@   rA   rC   r   )	enumeraterD   rE   rF   rG   r   r3   )Zreports_chunkrH   r1   irU   r*   r   r   r   generate_summary   s    	zUtils.generate_summaryc                 C   s   d}g }d}d}| D ]h}t | }||kr@|| ||7 }qt||}|g}|| |t | 7 }|t | 7 }q|rt|}|S )Nr   x   r   )r!   r2   r%   r	   r]   )rS   rH   summaryZcurrent_chunkr5   Ztotal_words_in_chunkrU   Zreport_tokensr   r   r   summarize_reports   s"    



zUtils.summarize_reportsc                 C   sB   d}|d|  d7 }t jj||ddd dd}t|jd j S )	Naq  IMPORTANT THE RESPONSE SHOULD BE JSON FORMAT ONLY!! DO NOT RESPOND WITH ANYTHING ELSE OUTSIDE OF THE JSON FORMAT, THANKS, ... Please generate a sentiment analysis and theme report in JSON format for the following list of mini reports of the same video, Please include the following in the json report:
            - The average percentage of positive, neutral, and negative feedback in comma to respect json format, it's very urgent!
            - A summary of what people are generally talking about in 50 words minimum
            - More insightful data about what people "liked" and what people "didn't like"
            - Any additional details that may be useful
            Format the report in JSON and only json format! don't write anything outside of the json {} and make sure the json format is 100% correct and for percentage use quote so it's correct format in json :

z	Report :
z

rY   r0   r>   rZ   r   )rD   rE   rF   jsonloadsrG   r   r3   )rU   rH   r1   r*   r   r   r   define_summary   s    	zUtils.define_summaryN)r:   )r:   )r:   )r:   )__name__
__module____qualname__staticmethodr    r   r   r-   r9   rW   r]   r`   rc   r   r   r   r   r	      s    
+
- r	   )r   ra   rD   concurrent.futuresrM   Zdecoupler   Zgoogleapiclient.discoveryr   Zgoogleapiclient.errorsr   r   r    r)   r	   r   r   r   r   <module>   s   