import nltk
import string
from langdetect import detect
from collections import Counter
from nltk.corpus import stopwords
from typing import Any, Dict, List
from statistics import median, stdev
import pandas as pd
import json

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.fr.stop_words import STOP_WORDS as FR_STOP_WORDS


import json
from tqdm import tqdm
import datetime

def count_ads_by_date(ads_data, week_dates):
    # Initialize the date count dictionary
    date_count = {}

    # # Loop over the ads data and count the dates
    for ad in ads_data:
        # Get the date for this ad
        date_str = ad['Timestamp']
        date_obj = datetime.datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S').date()
        # Check if the date is in the week dates
        if date_obj in week_dates:
            if date_obj not in date_count:
                date_count[date_obj] = 0
            date_count[date_obj] += 1
    # Convert the date counts to the desired format
    chart_data = []
    for date_obj in week_dates:
        day_name = date_obj.strftime('%A')
        count = date_count.get(date_obj, 0)
        chart_data.append({
            'name': day_name,
            'data': [count]
        })

    return [item['data'][0] for item in chart_data]

def get_week_dates(week_string):
    year, week_num = map(int, week_string.split('-W'))
    first_day = datetime.datetime.strptime(f'{year}-W{week_num}-1', '%Y-W%W-%w')
    dates = [first_day + datetime.timedelta(days=i) for i in range(7)]
    date_objects = [datetime.datetime.strptime(date.strftime('%Y-%m-%d'), '%Y-%m-%d').date() for date in dates]
    return date_objects

def get_brands_by_id(categories_data, id):
    for category in categories_data:
        if category['id'] == id:
            return category.get('brands', [])
    return []

def count_ads_by_brand(ads_data, brand_str):
    # If brands are specified, count the ads by brand
    brands = brand_str.split(',')

    # Initialize the brand count dictionary
    brand_count = {brand: 0 for brand in brands}

    # Loop over the ads data and count the brands
    for ad in ads_data:
        # Get the brand for this ad
        brand = ad['Brand']
        if brand in brands:
            brand_count[brand] += 1

    # Filter out the brands with count = 0
    brand_count = {brand: count for brand, count in brand_count.items() if count > 0}

    # Convert the brand counts to the desired format
    data = [{"name": brand, "y": count} for brand, count in brand_count.items()]

    return data


def get_excel_ads(file_path):
    # Determine the file type based on the file extension
    file_type = file_path.split(".")[-1].lower()
    # Read the file using pandas and convert it to a list of dictionaries
    if file_type == "xlsx":
        df = pd.read_excel(file_path, parse_dates=["Timestamp"])
    elif file_type == "csv":
        # df = pd.read_csv(file_path, parse_dates=["Timestamp"], date_parser=lambda x: pd.to_datetime(x, format="%m/%d/%Y %I:%M:%S %p"))
        df = pd.read_csv(file_path, parse_dates=["Timestamp"], date_format="%m/%d/%Y %I:%M:%S %p")
        # pd.to_datetime(["Timestamp"], format=' %m/%d/%Y %I:%M:%S %p')
    else:
        raise ValueError("File type not supported.")
    # Remove the "Ad_Iframe" and "Day_Iframe" columns from the dataframe
    df = df.drop(["Ad_Iframe", "Day_Iframe"], axis=1)
    # Convert the remaining columns to a list of dictionaries
    data = df.to_dict(orient="records")
    # Add a "count" key with a value of 0 to each dictionary
    for datum in data:
        datum["Timestamp"] = str(datum["Timestamp"])
        datum["count"] = 0
    # Convert the list of dictionaries to a JSON object
    json_data = json.dumps(data)
    # Parse the JSON string into a JSON object
    json_object = json.loads(json_data)
    # Return the JSON object
    return json_object

def generate_static_json(original_json_file_path, static_json_file_path, confidence_value=70):
    # Load the original JSON content
    with open(original_json_file_path, 'r') as f:
        original_json = json.load(f)

    # Initialize a list to store the aggregated results
    results = []
    fps = 30

    # Initialize a progress bar
    pbar = tqdm(total=len(original_json), desc='Processing frames')

    # Iterate through all the frames in the original JSON content
    for frame in original_json:
        # Update the progress bar
        pbar.update(1)

        for obj in frame['response']:
            if obj['Confidence'] >= confidence_value: # Only include objects with confidence > 50%
                # Extract the object name and confidence
                name = obj['Name']
                confidence = obj['Confidence']

                # Initialize a new entry for the object if it doesn't already exist
                obj_dict = next((d for d in results if d['object'] == name), None)
                if obj_dict is None:
                    obj_dict = {
                        'object': name,
                        'parents': [],
                        'categories': [],
                        'count': 0,
                        'total_confidence': 0,
                        'avg_confidence': 0,
                        'seconds': 0,
                    }
                    results.append(obj_dict)

                # Add the object's parents and categories to the corresponding lists
                for parent in obj['Parents']:
                    if parent['Name'] not in obj_dict['parents']:
                        obj_dict['parents'].append(parent['Name'])
                for category in obj['Categories']:
                    if category['Name'] not in obj_dict['categories']:
                        obj_dict['categories'].append(category['Name'])

                # Update the object's count and total confidence
                obj_dict['count'] += 1
                obj_dict['total_confidence'] += confidence

    # Calculate the average confidence for each object
    for obj_dict in results:
        if obj_dict['count'] > 0 :
            obj_dict['avg_confidence'] = obj_dict['total_confidence'] / obj_dict['count']
            obj_dict['seconds'] = obj_dict['count'] / fps

    final_result = []
    for obj_dict in results:
        if obj_dict['count'] >= 180 and ( 'Events and Attractions' not in obj_dict['categories'] ) and ( 'Weapons and Military' not in obj_dict['categories'] ):
            final_result.append(obj_dict)

    # Close the progress bar
    pbar.close()

    # Write the static JSON to a file
    with open(static_json_file_path, 'w') as f:
        json.dump(results, f, indent=4)

    return final_result


def get_stopwords(lang):
    return set(stopwords.words(lang))

def excel_to_json_epg(filepath, sheet_name):
    """
    Reads an Excel file and converts its data to a JSON object.

    Args:
        filepath (str): The file path of the Excel file.

    Returns:
        A JSON object containing the data from the Excel file.
    """
    # Load the Excel file into a Pandas dataframe
    df = pd.read_excel(filepath, sheet_name=sheet_name)
    # Create a list of dictionaries, one for each row of data
    data = []
    for i in range(len(df)):
        datum = {
            "date": datetime.datetime.strptime(df['StartDate'][i], '%Y-%m-%d').strftime('%Y-%m-%d'),
            "time start": datetime.datetime.strptime(df['StartTime'][i], '%H:%M:%S').strftime('%H:%M:%S'),
            "time end": datetime.datetime.strptime(df['EndTime'][i], '%H:%M:%S').strftime('%H:%M:%S'),
            "show": df['Event Name'][i],
            "views": 0
        }
        data.append(datum)
    # Convert the list to a JSON object
    json_data = json.dumps(data)
    # Parse the JSON string into a JSON object
    json_object = json.loads(json_data)
    # Return the JSON object
    return json_object

def excel_to_json_views(filepath):
    """
    Reads an Excel file and converts its data to a JSON object.

    Args:
        filepath (str): The file path of the Excel file.

    Returns:
        A JSON object containing the data from the Excel file.
    """
    # Load the Excel file into a Pandas dataframe
    df = pd.read_excel(filepath )

    # Create a list of dictionaries, one for each row of data
    data = []
    for i in range(len(df)):
        datum = {
            "channel": "2M Maroc",
            "date": datetime.datetime.strptime(df['day'][i], '%Y-%m-%d').strftime('%Y-%m-%d'),
            "time": datetime.datetime.strptime(df['time'][i], '%H:%M:%S').strftime('%I:%M %p'),
            # "viewers": df['2M Maroc'][i]*40000
            "viewers": int(round(df['2M Maroc'][i] * 40000))
        }
        data.append(datum)

    # Convert the list to a JSON object
    json_data = json.dumps(data)

    # Parse the JSON string into a JSON object
    json_object = json.loads(json_data)

    # Return the JSON object
    return json_object

def add_show_to_views(json1: List[Dict[str, Any]], json2: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Adds a 'show' key to each dictionary in json1 based on the time range in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON1, with a 'show' key added to each item.
    """
    # Create a new list to hold the updated JSON
    updated_json = []

    # Iterate over items in json1
    for item1 in json1:
        # Copy the dictionary to avoid modifying the original
        updated_item = item1.copy()
        time = datetime.datetime.strptime(item1['time'], '%I:%M %p').time()

        # Find the corresponding show in json2 based on the time
        for item2 in json2:
            start_time = datetime.datetime.strptime(item2['time start'], '%H:%M:%S').time()
            end_time = datetime.datetime.strptime(item2['time end'], '%H:%M:%S').time()
            show = item2['show']

            if start_time <= time <= end_time:
                # Update the 'show' key in the copied dictionary
                updated_item['show'] = show
                break

        # Add the updated item to the new list
        updated_json.append(updated_item)

    return updated_json

def add_calcul_to_epg(json1: List[Dict[str, Any]], json2: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Calculates some statistics based on the views in json1 within the time range of each show in json2.
    Adds attributes 'total', 'max', 'min', 'mean', 'median', 'ecart-type views' to each show in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON2, with added attributes to each show.
    """
    # Create a new list to hold the updated JSON
    updated_json = []

    # Iterate over items in json2
    for item2 in json2:
        # Copy the dictionary to avoid modifying the original
        updated_item = item2.copy()

        start_time = datetime.datetime.strptime(item2['time start'], '%H:%M:%S').time()
        end_time = datetime.datetime.strptime(item2['time end'], '%H:%M:%S').time()
        show = item2['show']

        # Filter json1 to get the views within the time range of this show
        views_in_range = [item1['viewers'] for item1 in json1 if start_time <= datetime.datetime.strptime(item1['time'], '%I:%M %p').time() <= end_time]

        # Calculate statistics for the views
        updated_item['total'] = sum(views_in_range)
        updated_item['max'] = max(views_in_range) if views_in_range else 0
        updated_item['min'] = min(views_in_range) if views_in_range else 0
        updated_item['mean'] = sum(views_in_range) / len(views_in_range) if views_in_range else 0
        updated_item['median'] = median(views_in_range) if views_in_range else 0
        updated_item['ecart-type'] = stdev(views_in_range) if len(views_in_range) > 1 else 0

        # Add the updated item to the new list
        updated_json.append(updated_item)

    return updated_json


def speech_words(text):
    """
    Returns a list of words in the input text, in descending order of frequency rank.

    Parameters:
        text (str): The input text to extract words from.

    Returns:
        list: A list of dictionaries containing each word in the text, its frequency rank, and count.
    """

    # Parse the input text using the appropriate language model
    lang = detect(text)
    if lang == 'en':
        nlp = spacy.load('en_core_web_sm')
        stopwords = STOP_WORDS
    elif lang == 'fr':
        nlp = spacy.load('fr_core_news_sm')
        stopwords = FR_STOP_WORDS
    else:
        # If language is not supported, return an empty list
        return []

    # Add sentencizer component and tokenize text into words
    if not nlp.has_pipe('sentencizer'):
        nlp.add_pipe('sentencizer')
    doc = nlp(text)
    words = [token.text for token in doc if not token.is_stop and not token.is_punct and token.text.lower() not in stopwords]

    # Handle plural subjects to be singular

    # Count the frequency of each word and calculate total number of non-stopwords
    word_counts = Counter(words)
    total_words = sum(word_counts.values())

    # Calculate rank of each word based on its frequency and return a list of words, in descending order of frequency rank
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
    return [{'word': word, 'rank': count/total_words, 'count': count} for word, count in sorted_words]


def speech_words_v1(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = get_stopwords('english')
    elif lang == 'fr':
        stopwords = get_stopwords('french')
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    # text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]
    # Remove punctuations from each word in words_list using str.translate()
    cleaned_words = [word for word in filtered_words if not all(char in string.punctuation for char in word)]

    # Count the frequency of each word
    word_counts = Counter(cleaned_words)

    # Calculate total number of non-stopwords in text
    total_words = sum(word_counts.values())

    # Calculate rank of each word based on its frequency
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
    ranked_words = [{'word': word, 'rank': count/total_words, 'count': count} for word, count in sorted_words]

    # Return a list of the words, in descending order of frequency rank
    return ranked_words

def speech_words_copy_v4(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'it', 'is', 'are', 'was', 'were', 'of', 'for', 'on', 'in', 'to', 'that', 'with', 'this', 'at', 'from', 'by', 'be', 'not', 'as', 'you', 'your', 'they', 'their', 'i', 'we', 'our', 'us', 'he', 'she', 'him', 'her', 'his', 'hers', 'its']
    elif lang == 'fr':
        stopwords = ['un', 'une', 'le', 'la', 'les', 'et', 'ou', 'mais', 'si', 'alors', 'est', 'sont', 'était', 'étaient', 'de', 'du', 'des', 'pour', 'sur', 'dans', 'à', 'avec', 'ce', 'cet', 'cette', 'ces', 'en', 'par', 'pas', 'comme', 'vous', 'votre', 'ils', 'leur', 'je', 'nous', 'notre', 'nous', 'il', 'elle', 'lui', 'leur', 'son', 'sa', 'ses']
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # # Split the text into words
    # words = text.split()
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Assign a rank to each word based on its frequency
    ranked_words = [{'word': word, 'rank': rank+1} for rank, (word, count) in enumerate(sorted_words)]

    # Return a list of the words, in descending order of frequency
    return ranked_words

def speech_words_copy_v3(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'it', 'is', 'are', 'was', 'were', 'of', 'for', 'on', 'in', 'to', 'that', 'with', 'this', 'at', 'from', 'by', 'be', 'not', 'as', 'you', 'your', 'they', 'their', 'i', 'we', 'our', 'us', 'he', 'she', 'him', 'her', 'his', 'hers', 'its']
    elif lang == 'fr':
        stopwords = ['un', 'une', 'le', 'la', 'les', 'et', 'ou', 'mais', 'si', 'alors', 'est', 'sont', 'était', 'étaient', 'de', 'du', 'des', 'pour', 'sur', 'dans', 'à', 'avec', 'ce', 'cet', 'cette', 'ces', 'en', 'par', 'pas', 'comme', 'vous', 'votre', 'ils', 'leur', 'je', 'nous', 'notre', 'nous', 'il', 'elle', 'lui', 'leur', 'son', 'sa', 'ses']
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # # Split the text into words
    # words = text.split()
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Return a list of the words, in descending order of frequency
    return [{'word': word, 'count': count} for word, count in sorted_words]

def speech_words_copy(text):

    nltk.download('stopwords')
    # Detect the language of the text
    lang = detect(text)

    # Download the relevant stop words for the detected language
    nltk.download('stopwords')
    stop_words = set(stopwords.words(lang))

    # Split the text into words
    words = text.split()

    # Remove stop words
    filtered_words = [word for word in words if word.lower() not in stop_words]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Return a list of the words, in descending order of frequency
    return [{'word': word, 'count': count} for word, count in sorted_words]

def speech_words_copy_v1(text):
    # Split the text into words
    words = text.split()

    # Count the frequency of each word
    word_counts = Counter(words)

    # Create a list of dictionaries with the word and its count
    result = [{'word': word, 'count': count} for word, count in word_counts.items()]

    # Sort the list by count in descending order
    result = sorted(result, key=lambda x: x['count'], reverse=True)

    return result

def speech_words_copy_v2(text):
    # Split the text into words
    words = text.split()

    # Count the frequency of each word
    word_counts = Counter(words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Create a list of tuples containing the words and their counts
    word_counts = [(word, count) for word, count in sorted_words]

    # Remove duplicates from the list
    unique_words = []
    for word, count in word_counts:
        if word not in unique_words:
            unique_words.append((word, count))

    # Return the list of unique words and their counts
    return unique_words