import re   
import json
import openai 
import concurrent.futures
from decouple import config

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


YOUTUBE_URL_PATTERN = '^(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([\w-]{11})'

openai.api_key = config('OPENAPI_API_KEY', default='', cast=str) 

class Utils:
    @staticmethod
    def validate_youtube_url(videoUrl: str):
        # Regular expression to match YouTube video URLs 
        return re.match(YOUTUBE_URL_PATTERN, videoUrl) is not None
    
    @staticmethod
    def extract_video_id(video_url: str):  
        match = re.match(YOUTUBE_URL_PATTERN, video_url)
        if match:
            return {"message": "Video ID Extracted Successfully", "data": match.group(4), "status": True}
        else:
            return {"message": "Invalid YouTube URL format", "data": "", "status": False}
             
    @staticmethod
    def get_youtube_comments(video_id, nbr_comments): 
        try: 
            api_key = config('YOUTUBE_API_KEY', default='', cast=str) 
            youtube = build('youtube', 'v3', developerKey=api_key)
            comments_info = []  # List to store comment information

            next_page_token = None
            while len(comments_info) < nbr_comments:  # Use < instead of <= to ensure we get exactly nbr_comments
                response = youtube.commentThreads().list(
                    part="snippet,replies",
                    videoId=video_id,
                    maxResults=min(100, nbr_comments),
                    pageToken=next_page_token
                ).execute() 

                # Get the total number of comments from the response
                total_comments = response['pageInfo']['totalResults'] 

                for item in response.get("items", []):  
                    comment_info = {
                        "id"               : item["snippet"]["topLevelComment"]["id"],  
                        "text"             : item["snippet"]["topLevelComment"]["snippet"]["textOriginal"], 
                        "author"           : item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"], 
                        "authorAvatarURL"  : item["snippet"]["topLevelComment"]["snippet"]["authorProfileImageUrl"],
                        "authorChannelUrl" : item["snippet"]["topLevelComment"]["snippet"]["authorChannelUrl"],
                        "Likes Count"      : item["snippet"]["topLevelComment"]["snippet"]["likeCount"],
                        "published_at"     : item["snippet"]["topLevelComment"]["snippet"]["publishedAt"],
                        "updated_at"       : item["snippet"]["topLevelComment"]["snippet"]["updatedAt"] 
                    }
                    comments_info.append(comment_info)

                next_page_token = response.get("nextPageToken")
 
                if not next_page_token:
                    break 
            
            return {"message": "Comments Extracted Successfully", "data": comments_info, "status": True}
       
        except HttpError as http_error:
            return {"message": f"YouTube API error: {http_error}", "data": "", "status": False}
        except Exception as ex:
            return {"message": f"Error fetching comments: {ex}", "data": "", "status": False}
        
    @staticmethod
    def generate_prompts(comments, max_words_prompt):
        results = []
        unwanted_comments = []

        current_prompt = ""
        current_word_count = 0
        current_comment_count = 0

        # Introductory and ending phrases
        intro_phrase = "Analyze the sentiments and overall themes in the following comments in Moroccan Darija:\n\n"
        ending_phrase = "\n\nProvide a report in English that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about in 50 words. Additionally, provide more data, a section on what people liked, and a section on what people didn't like."

        for comment in comments:
            comment_text = f"- {comment['text']}\n"
            comment_word_count = len(comment_text.split())

            if comment_word_count >= max_words_prompt:
                # If the comment exceeds or equals the maximum word count, add it to the unwanted-comments list
                unwanted_comments.append(comment)
            elif current_word_count + len(intro_phrase.split()) + len(ending_phrase.split()) + comment_word_count <= max_words_prompt:
                # Add the comment to the current prompt
                current_prompt += comment_text
                current_word_count += comment_word_count
                current_comment_count += 1
            else:
                # If adding the comment would exceed the token limit, start a new prompt
                results.append({
                    'prompt': intro_phrase + current_prompt.strip() + ending_phrase,
                    'word_count': current_word_count,
                    'comment_count': current_comment_count
                })
                current_prompt = comment_text
                current_word_count = comment_word_count
                current_comment_count = 1

        # Add the last prompt if it's not empty
        if current_prompt:
            results.append({
                'prompt': intro_phrase + current_prompt.strip() + ending_phrase,
                'word_count': current_word_count,
                'comment_count': current_comment_count
            })

        return results, unwanted_comments
 
    @staticmethod
    def get_comments_reports(comment_list, open_ai_model="text-embedding-ada-002" ):
        reports = [] 
        prompts_info, unwanted_comments = Utils.generate_prompts(comment_list, max_words_prompt=200) 
        
        prompts = [prompt_info['prompt'] for prompt_info in prompts_info]

        # Define a function to process a single prompt and make the API call
        def process_prompt(prompt):
            response = openai.Completion.create(
                engine=open_ai_model,
                prompt=prompt,
                n=1,
                stop=None,
                max_tokens=3000,
                temperature=0.7,
            )
            return response.choices[0].text.strip()

        # Use concurrent.futures to run the processing function on all prompts in parallel
        with concurrent.futures.ThreadPoolExecutor() as executor:
            report_futures = {executor.submit(process_prompt, prompt): prompt for prompt in prompts}

            for future in concurrent.futures.as_completed(report_futures):
                prompt = report_futures[future]
                try:
                    report = future.result()
                    reports.append(report)
                except Exception as e:
                    return f"Error processing prompt: {prompt}, {str(e)}"

        return reports
    
    @staticmethod
    def generate_summary(reports_chunk, open_ai_model="text-embedding-ada-002"): 
        prompt = "Analyze the sentiments and overall themes in the following reports :\n\n" 

        for i, report in enumerate(reports_chunk, start=1):
            prompt += f"{i}. {report}\n"

        prompt += "\n\nProvide a report in English that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about in 50 words. Additionally, provide more data, a section on what people liked, and a section on what people didn't like."

        response = openai.Completion.create(
            engine=open_ai_model,
            prompt=prompt,
            max_tokens=2000,
            n=1,
            stop=None,
            temperature=0.7
        )

        return response.choices[0].text.strip()

    @staticmethod
    def summarize_reports(reports, open_ai_model="text-embedding-ada-002"):
        summary = ""
        current_chunk = []
        max_words_prompt = 120  
        total_words_in_chunk = 0

        for report in reports:
            
            # Calculate the tokens needed for the current chunk 
            report_tokens = len(report.split())

            if report_tokens <= max_words_prompt:
                current_chunk.append(report)
                total_words_in_chunk += report_tokens
            else:
                # Generate summary for the current chunk and add it to the final summary
                summary = Utils.generate_summary(current_chunk, open_ai_model) 
                current_chunk = [summary]  # Start a new chunk with the current report 
                current_chunk.append(report) 
                total_words_in_chunk += len(summary.split())
                total_words_in_chunk += len(report.split())

        # Generate summary for the remaining chunk (if any)
        if current_chunk:
            summary = Utils.generate_summary(current_chunk) 
        return summary

    @staticmethod
    def define_summary(report, open_ai_model="text-embedding-ada-002"):
        prompt = """IMPORTANT THE RESPONSE SHOULD BE JSON FORMAT ONLY!! DO NOT RESPOND WITH ANYTHING ELSE OUTSIDE OF THE JSON FORMAT, THANKS, ... Please generate a sentiment analysis and theme report in JSON format for the following list of mini reports of the same video, Please include the following in the json report:
            - The average percentage of positive, neutral, and negative feedback in comma to respect json format, it's very urgent!
            - A summary of what people are generally talking about in 50 words minimum
            - More insightful data about what people "liked" and what people "didn't like"
            - Any additional details that may be useful
            Format the report in JSON and only json format! don't write anything outside of the json {} and make sure the json format is 100% correct and for percentage use quote so it's correct format in json :\n\n"""

         
        prompt += f"Report :\n{report}\n\n"

        response = openai.Completion.create(
            engine=open_ai_model,
            prompt=prompt,
            max_tokens=2000,
            n=1,
            stop=None,
            temperature=0.7
        ) 

        return json.loads(response.choices[0].text.strip() )