import os
import sys
import openai
import pandas as pd
import requests
from openpyxl import load_workbook
from docx import Document
import json

youtube_video0 = sys.argv[1]


def convert_youtube_url(url):
    if 'youtu.be' in url:
        video_id = url.split('/')[-1]
        return 'https://www.youtube.com/watch?v=' + video_id
    else:
        return url

youtube_video = convert_youtube_url(youtube_video0)

os.system("> /var/www/html/exportcomments/analysis_results.txt" )
os.system("> /var/www/html/exportcomments/final_report.txt" )
os.system("> /var/www/html/exportcomments/output_file.txt" )
os.system("> /var/www/html/exportcomments/temp.json" )
os.system("> /var/www/html/exportcomments/temp2.json" )

os.system("youtube-comment-downloader --url "+youtube_video+" --output /var/www/html/exportcomments/temp.json --limit 3000")


import json
import openai

def read_comments_from_json_old(file_path):
    with open(file_path, "r") as f:
        comments = [json.loads(line)["text"] for line in f.readlines()]
    return comments

def read_comments_from_json(file_path):
    with open(file_path, "r") as f:
        comments = [json.loads(line)["text"] for line in f.readlines() if len(line.split()) <= 100]
        # print(len(line.split()))
    return comments

def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def analyze_comments(comments):
    analysis_results = []
    for comment_batch in chunks(comments, 20):
        batch_text = "\n".join(f"- {comment}" for comment in comment_batch)
        print(batch_text)

        prompt = f"Analyze the sentiments and overall themes in the following comments it's in Moroccan Darija :\n\n{batch_text}\n\nProvide a report in English that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about in 50 words, and provide more data and a section of what people liked, and a section of what people didn't like."
        response = openai.Completion.create(engine=engine, prompt=prompt, max_tokens=500, n=1, stop=None, temperature=0.7)
        analysis_results.append(response.choices[0].text.strip())
        # print(response.choices[0].text.strip())
        with open("/var/www/html/exportcomments/analysis_results.txt", "a") as outfile:
            outfile.write(response.choices[0].text.strip() + "\n")
    return analysis_results


def summarize_results(results):
    prompt = """ IMPORTANT THE RESPONSE SHOULD BE JSON FORMAT ONLY!! DO NOT RESPOND WITH ANYTHING ELSE OUTSIDE OF THE JSON FORMAT, THANKS, ... Please generate a sentiment analysis and theme report in JSON format for the following list of mini reports of the same video, Please include the following in the json report:
    - The average percentage of positive, neutral, and negative feedback in comma to respect json format, it's very urgent!
    - A summary of what people are generally talking about in 50 words minimum
    - More insightful data about what people "liked" and what people "didn't like"
    - Any additional details that may be useful
    Format the report in JSON and only json format! don't write anything outside of the json {} and make sure the json format is 100% correct and for percentage use quote so it's correct format in json :\n\n"""

    # prompt = "Based on the following individual analysis reports, provide an averaged final report that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about more insightful data about what people want, and what people don't want, and any more details would be apreatiaced :\n\n"
    for i, result in enumerate(results):
        prompt += f"Report {i + 1}:\n{result}\n\n"
    prompt += "Final Summary:"
    response = openai.Completion.create(engine=engine, prompt=prompt, max_tokens=2000, n=1, stop=None, temperature=0.7)
    final_summary = response.choices[0].text.strip()

    return final_summary



def summarize_results2(results):
    prompt = "take this text and return to me the json file fixed, and only the json file as an output, nothing else outside of the json file"

    # prompt = "Based on the following individual analysis reports, provide an averaged final report that includes the percentage of positive, neutral, and negative feedback, as well as a summary of what people are generally talking about more insightful data about what people want, and what people don't want, and any more details would be apreatiaced :\n\n"
    # for i, result in enumerate(results):
    prompt += results
    response = openai.Completion.create(engine=engine, prompt=prompt, max_tokens=3000, n=1, stop=None, temperature=0.7)
    final_summary = response.choices[0].text.strip()

    return final_summary


# Set up OpenAI API
openai.api_key = "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"
engine = "text-davinci-003"
# engine = "gpt-3.5-turbo"

# Read comments from JSON
file_path = "/var/www/html/exportcomments/temp.json"
comments = read_comments_from_json(file_path)

# Analyze comments in batches
analysis_results = analyze_comments(comments)

# Summarize the analysis results
final_report = summarize_results(analysis_results)
final_report = summarize_results2(final_report)
print(final_report)
with open("/var/www/html/exportcomments/final_report.txt", "w") as outfile:
    outfile.write(final_report)

with open("/var/www/html/exportcomments/temp2.json", "w") as outfile:
    outfile.write(final_report)
