import nltk
import string
nltk.download('punkt')
nltk.download('stopwords')
from langdetect import detect
from collections import Counter
from nltk.corpus import stopwords  
from datetime import datetime
from typing import Any, Dict, List 
from statistics import median, stdev
import pandas as pd
import json

def get_stopwords(lang):
    return set(stopwords.words(lang))

def excel_to_json_epg(filepath):
    """
    Reads an Excel file and converts its data to a JSON object.

    Args:
        filepath (str): The file path of the Excel file.

    Returns:
        A JSON object containing the data from the Excel file.
    """
    # Load the Excel file into a Pandas dataframe
    df = pd.read_excel(filepath, sheet_name='F1')

    # Create a list of dictionaries, one for each row of data
    data = []
    for i in range(len(df)): 
        datum = {
            "date": df['Start Date'][i].strftime('%d-%m-%Y'),
            "time start": df['Start Time'][i].strftime('%H:%M:%S'),
            "time end": df['End Time'][i].strftime('%H:%M:%S'),
            "show": df['Event Name'][i],
            "views": 0
        }
        data.append(datum)

    # Convert the list to a JSON object
    json_data = json.dumps(data)

    # Parse the JSON string into a JSON object
    json_object = json.loads(json_data)

    # Return the JSON object
    return json_object

def excel_to_json_views(filepath):
    """
    Reads an Excel file and converts its data to a JSON object.

    Args:
        filepath (str): The file path of the Excel file.

    Returns:
        A JSON object containing the data from the Excel file.
    """
    # Load the Excel file into a Pandas dataframe
    df = pd.read_excel(filepath )

    # Create a list of dictionaries, one for each row of data
    data = []
    for i in range(len(df)): 
        datum = {
            "channel": df['sfr_channel_name'][i],
            "date": datetime.strptime(df['day'][i], '%Y-%m-%d').strftime('%Y-%m-%d'),
            "time": datetime.strptime(df['minute'][i], '%H:%M:%S').strftime('%I:%M %p'),
            "viewers": df['purcent'][i]*40000 
        }
        data.append(datum)

    # Convert the list to a JSON object
    json_data = json.dumps(data)

    # Parse the JSON string into a JSON object
    json_object = json.loads(json_data)

    # Return the JSON object
    return json_object


def add_show_to_views(json1: List[Dict[str, Any]], json2: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Adds a 'show' key to each dictionary in json1 based on the time range in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON1, with a 'show' key added to each item.
    """
    # Create a new list to hold the updated JSON
    updated_json = []
    
    # Iterate over items in json1
    for item1 in json1:
        # Copy the dictionary to avoid modifying the original
        updated_item = item1.copy()
        time = datetime.strptime(item1['time'], '%I:%M %p').time()
 
        # Find the corresponding show in json2 based on the time
        for item2 in json2:
            start_time = datetime.strptime(item2['time start'], '%H:%M:%S').time()
            end_time = datetime.strptime(item2['time end'], '%H:%M:%S').time()
            show = item2['show']
            
            if start_time <= time <= end_time:
                # Update the 'show' key in the copied dictionary
                updated_item['show'] = show
                break
        
        # Add the updated item to the new list
        updated_json.append(updated_item)
    
    return updated_json

def add_calcul_to_epg(json1: List[Dict[str, Any]], json2: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Calculates some statistics based on the views in json1 within the time range of each show in json2.
    Adds attributes 'total', 'max', 'min', 'mean', 'median', 'ecart-type views' to each show in json2.

    Args:
        json1: A list of dictionaries representing the first JSON.
        json2: A list of dictionaries representing the second JSON.

    Returns:
        A list of dictionaries representing the updated JSON2, with added attributes to each show.
    """
    # Create a new list to hold the updated JSON
    updated_json = []

    # Iterate over items in json2
    for item2 in json2:
        # Copy the dictionary to avoid modifying the original
        updated_item = item2.copy()

        start_time = datetime.strptime(item2['time start'], '%H:%M:%S').time()
        end_time = datetime.strptime(item2['time end'], '%H:%M:%S').time()
        show = item2['show']

        # Filter json1 to get the views within the time range of this show
        views_in_range = [item1['viewers'] for item1 in json1 if start_time <= datetime.strptime(item1['time'], '%I:%M %p').time() <= end_time]

        # Calculate statistics for the views
        updated_item['total'] = sum(views_in_range)
        updated_item['max'] = max(views_in_range) if views_in_range else 0
        updated_item['min'] = min(views_in_range) if views_in_range else 0
        updated_item['mean'] = sum(views_in_range) / len(views_in_range) if views_in_range else 0
        updated_item['median'] = median(views_in_range) if views_in_range else 0
        updated_item['ecart-type'] = stdev(views_in_range) if len(views_in_range) > 1 else 0

        # Add the updated item to the new list
        updated_json.append(updated_item)

    return updated_json

def speech_words(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = get_stopwords('english')
    elif lang == 'fr':
        stopwords = get_stopwords('french')
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Calculate total number of non-stopwords in text
    total_words = sum(word_counts.values())

    # Calculate rank of each word based on its frequency
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
    ranked_words = [{'word': word, 'rank': count/total_words, 'count': count} for word, count in sorted_words]

    # Return a list of the words, in descending order of frequency rank
    return ranked_words


def speech_words_copy_v4(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'it', 'is', 'are', 'was', 'were', 'of', 'for', 'on', 'in', 'to', 'that', 'with', 'this', 'at', 'from', 'by', 'be', 'not', 'as', 'you', 'your', 'they', 'their', 'i', 'we', 'our', 'us', 'he', 'she', 'him', 'her', 'his', 'hers', 'its']
    elif lang == 'fr':
        stopwords = ['un', 'une', 'le', 'la', 'les', 'et', 'ou', 'mais', 'si', 'alors', 'est', 'sont', 'était', 'étaient', 'de', 'du', 'des', 'pour', 'sur', 'dans', 'à', 'avec', 'ce', 'cet', 'cette', 'ces', 'en', 'par', 'pas', 'comme', 'vous', 'votre', 'ils', 'leur', 'je', 'nous', 'notre', 'nous', 'il', 'elle', 'lui', 'leur', 'son', 'sa', 'ses']
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # # Split the text into words
    # words = text.split()
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Assign a rank to each word based on its frequency
    ranked_words = [{'word': word, 'rank': rank+1} for rank, (word, count) in enumerate(sorted_words)]

    # Return a list of the words, in descending order of frequency
    return ranked_words

def speech_words_copy_v3(text):
    # Detect language of text
    lang = detect(text)

    # Define stop words based on detected language
    if lang == 'en':
        stopwords = ['a', 'an', 'the', 'and', 'or', 'but', 'if', 'then', 'it', 'is', 'are', 'was', 'were', 'of', 'for', 'on', 'in', 'to', 'that', 'with', 'this', 'at', 'from', 'by', 'be', 'not', 'as', 'you', 'your', 'they', 'their', 'i', 'we', 'our', 'us', 'he', 'she', 'him', 'her', 'his', 'hers', 'its']
    elif lang == 'fr':
        stopwords = ['un', 'une', 'le', 'la', 'les', 'et', 'ou', 'mais', 'si', 'alors', 'est', 'sont', 'était', 'étaient', 'de', 'du', 'des', 'pour', 'sur', 'dans', 'à', 'avec', 'ce', 'cet', 'cette', 'ces', 'en', 'par', 'pas', 'comme', 'vous', 'votre', 'ils', 'leur', 'je', 'nous', 'notre', 'nous', 'il', 'elle', 'lui', 'leur', 'son', 'sa', 'ses']
    else:
        # If language is not English or French, return an empty list of stop words
        stopwords = []

    # Remove punctuation and convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.lower()

    # Tokenize text into words
    words = nltk.word_tokenize(text)
    # # Split the text into words
    # words = text.split()
    # Remove stop words
    filtered_words = [word for word in words if word not in stopwords]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Return a list of the words, in descending order of frequency
    return [{'word': word, 'count': count} for word, count in sorted_words]

def speech_words_copy(text):
    
    nltk.download('stopwords')
    # Detect the language of the text
    lang = detect(text)

    # Download the relevant stop words for the detected language
    nltk.download('stopwords')
    stop_words = set(stopwords.words(lang))

    # Split the text into words
    words = text.split()

    # Remove stop words
    filtered_words = [word for word in words if word.lower() not in stop_words]

    # Count the frequency of each word
    word_counts = Counter(filtered_words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Return a list of the words, in descending order of frequency
    return [{'word': word, 'count': count} for word, count in sorted_words]

def speech_words_copy_v1(text):
    # Split the text into words
    words = text.split()

    # Count the frequency of each word
    word_counts = Counter(words)

    # Create a list of dictionaries with the word and its count
    result = [{'word': word, 'count': count} for word, count in word_counts.items()]

    # Sort the list by count in descending order
    result = sorted(result, key=lambda x: x['count'], reverse=True)

    return result

def speech_words_copy_v2(text):
    # Split the text into words
    words = text.split()

    # Count the frequency of each word
    word_counts = Counter(words)

    # Sort the words by their frequency, in descending order
    sorted_words = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)

    # Create a list of tuples containing the words and their counts
    word_counts = [(word, count) for word, count in sorted_words]

    # Remove duplicates from the list
    unique_words = []
    for word, count in word_counts:
        if word not in unique_words:
            unique_words.append((word, count))

    # Return the list of unique words and their counts
    return unique_words