import os
import io
import csv
import uuid 
import time  # Add this import for measuring processing duration
import torch
import librosa
import logging  # Import the logging module 
import datetime  # Add this import for datetime
from pydub import AudioSegment 
from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2ForCTC, Wav2Vec2Processor 
from fastapi import APIRouter, UploadFile
from config import AppConfig  # Import the AppConfig class from config.py


# Define the log file path
log_file_path = os.path.join(AppConfig.LOG_DIR, "app_test.log")

# Create a logger instance
logger = logging.getLogger(__name__)

# Configure the logger to save logs to a file
file_handler = logging.handlers.RotatingFileHandler(
    log_file_path,
    maxBytes=10 * 1024 * 1024,  # Set the maximum log file size (10 MB in this example)
    backupCount=5,  # Keep up to 5 backup log files
)
file_handler.setLevel(logging.DEBUG)  # Set the log level for the file handler

# Create a log formatter
log_formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
file_handler.setFormatter(log_formatter)

# Add the file handler to the logger
logger.addHandler(file_handler)

# Set the log level for the logger (adjust as needed)
logger.setLevel(logging.DEBUG)

 
router = APIRouter()

# @router.get("/v1/audio")
# async def get_audio():
#     return {"message": "This is the audio endpoint in version 1."}


# Load the Wav2Vec2 model 
tokenizer = Wav2Vec2CTCTokenizer("/var/www/html/Darija-Ai-API/weights/wav2vec2-large-xlsr-darija-train_v1/vocab.json", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|")
processor = Wav2Vec2Processor.from_pretrained('/var/www/html/Darija-Ai-API/weights/wav2vec2-large-xlsr-darija-train_v1', tokenizer=tokenizer)
model     = Wav2Vec2ForCTC.from_pretrained('/var/www/html/Darija-Ai-API/weights/wav2vec2-large-xlsr-darija-train_v1')

@router.post("/v2/audio/convert-speech-to-text", tags=["Audio D-voice"])
async def convert_speech_to_text(file: UploadFile):
    """
    Convert speech to text using the Wav2Vec2 model.
    """
    try:
        # Define a directory to save the audio files
        audio_dir = "audio_files"
        os.makedirs(audio_dir, exist_ok=True)  # Create the directory if it doesn't exist

        # Generate a unique filename for the audio file
        unique_filename = f"{uuid.uuid4()}.wav"
        audio_path = os.path.join(audio_dir, unique_filename)

        # Read the uploaded audio file using pydub and save it as a WAV file
        audio_data = await file.read()
        audio_segment = AudioSegment.from_file(io.BytesIO(audio_data))
        audio_segment.export(audio_path, format="wav")

        # Calculate the duration of the audio file
        duration = librosa.get_duration(filename=audio_path)
        
        # Record the upload datetime
        upload_datetime = datetime.datetime.now() 

        # Record the start time for measuring processing duration
        start_time = time.time()  
        # Load the audio file using torchaudio
        input_audio, sr = librosa.load(audio_path, sr=16000)

        # tokenize
        input_values = processor(input_audio, return_tensors="pt", padding=True).input_values

        # retrieve logits
        logits = model(input_values).logits

        tokens = torch.argmax(logits, axis=-1)

        # decode using n-gram
        transcription = tokenizer.batch_decode(tokens)

        # Record the processing duration 
        processing_duration = time.time() - start_time

        # Log the transcription
        logger.info(f"Transcription: {transcription}")

        # Save the transcription results and file path to a CSV file
        with open('transcription_results.csv', mode='a', newline='') as csv_file:
            fieldnames = ['File Path', 'File Duration', 'Upload Datetime', 'Processing Duration','Transcription']
            writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
            # writer.writerow({'File Path': audio_path, 'Transcription': transcription[0]})
            writer.writerow({
                'File Path': audio_path, 
                'File Duration': duration, 
                'Upload Datetime': upload_datetime, 
                'Processing Duration': processing_duration, 
                'Transcription': transcription[0]
            })
        return {"transcription": transcription}
    except Exception as e:
        # Log any exceptions that occur
        logger.exception(f"Error in convert_speech_to_text: {str(e)}")
        return {"error": "An error occurred while converting speech to text."}