import asyncio
import time
from typing import List, Dict, Any
from datetime import datetime
import logging
import feedparser
import aiohttp
from urllib.parse import urljoin
import re

from ...shared.kafka_client.kafka_client import SearchKafkaClient
from ...shared.common_models.search_models import SearchResult, SearchResponse
from ..core.config import settings

logger = logging.getLogger(__name__)

class RSSSearchService:
    def __init__(self):
        self.kafka_client = SearchKafkaClient(
            bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS,
            consumer_group=settings.KAFKA_CONSUMER_GROUP
        )
        
        # Common RSS feed URLs
        self.rss_feeds = [
            "https://rss.cnn.com/rss/edition.rss",
            "https://feeds.bbci.co.uk/news/rss.xml",
            "https://www.reddit.com/r/all/.rss",
            "https://hnrss.org/frontpage",
            "https://feeds.feedburner.com/TechCrunch",
            "https://www.wired.com/feed/rss",
            "https://feeds.arstechnica.com/arstechnica/index",
            "https://www.engadget.com/rss.xml",
            "https://feeds.mashable.com/Mashable",
            "https://feeds.reuters.com/reuters/topNews"
        ]
    
    async def process_search_task(self, message: Dict[str, Any]):
        """Process an RSS search task"""
        try:
            task_id = message.get("id")
            keywords = message.get("keywords", [])
            
            logger.info(f"Processing RSS search task: {task_id}")
            
            start_time = time.time()
            results = await self.search_rss_feeds(keywords)
            processing_time = time.time() - start_time
            
            response = SearchResponse(
                task_id=task_id,
                platform="rss",
                keywords=keywords,
                results=results,
                total_results=len(results),
                processing_time=processing_time,
                timestamp=datetime.utcnow()
            )
            
            await self.kafka_client.send_search_result(
                task_id=task_id,
                platform="rss",
                results=response.dict()
            )
            
            logger.info(f"RSS search completed: {task_id}, found {len(results)} results")
            
        except Exception as e:
            logger.error(f"Error processing RSS search task: {e}")
            await self.kafka_client.send_error_result(
                task_id=message.get("id", "unknown"),
                platform="rss",
                error=str(e)
            )
    
    async def search_rss_feeds(self, keywords: List[str], max_results: int = 50) -> List[SearchResult]:
        """Search through RSS feeds for keywords"""
        results = []
        keyword_pattern = re.compile("|".join(keywords), re.IGNORECASE)
        
        async with aiohttp.ClientSession() as session:
            tasks = []
            for feed_url in self.rss_feeds:
                task = self._search_single_feed(session, feed_url, keyword_pattern)
                tasks.append(task)
            
            # Execute all feed searches concurrently
            feed_results = await asyncio.gather(*tasks, return_exceptions=True)
            
            # Combine results from all feeds
            for feed_result in feed_results:
                if isinstance(feed_result, list):
                    results.extend(feed_result)
                elif isinstance(feed_result, Exception):
                    logger.warning(f"Error processing feed: {feed_result}")
        
        # Sort by published date (newest first) and limit results
        results.sort(key=lambda x: x.published_date or datetime.min, reverse=True)
        return results[:max_results]
    
    async def _search_single_feed(self, session: aiohttp.ClientSession, feed_url: str, keyword_pattern) -> List[SearchResult]:
        """Search a single RSS feed"""
        results = []
        
        try:
            logger.debug(f"Searching RSS feed: {feed_url}")
            
            async with session.get(feed_url, timeout=10) as response:
                if response.status == 200:
                    content = await response.text()
                    feed = feedparser.parse(content)
                    
                    for entry in feed.entries:
                        # Check if keywords match in title or description
                        title = getattr(entry, 'title', '')
                        description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')
                        
                        if keyword_pattern.search(title) or keyword_pattern.search(description):
                            result = self._create_rss_result(entry, feed_url)
                            if result:
                                results.append(result)
                
        except Exception as e:
            logger.warning(f"Error searching RSS feed {feed_url}: {e}")
        
        return results
    
    def _create_rss_result(self, entry, feed_url: str) -> SearchResult:
        """Create SearchResult from RSS entry"""
        try:
            # Extract basic information
            title = getattr(entry, 'title', 'No title')
            url = getattr(entry, 'link', '')
            description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')
            
            # Clean HTML from description
            description = re.sub(r'<[^>]+>', '', description)
            
            # Extract author
            author = getattr(entry, 'author', '') or getattr(entry, 'dc_creator', '')
            
            # Extract published date
            published_date = None
            if hasattr(entry, 'published_parsed') and entry.published_parsed:
                published_date = datetime(*entry.published_parsed[:6])
            elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
                published_date = datetime(*entry.updated_parsed[:6])
            
            # Extract source/feed name
            source = getattr(entry, 'source', {}).get('title', feed_url)
            
            return SearchResult(
                title=title,
                url=url,
                description=description,
                snippet=description[:200] + "..." if len(description) > 200 else description,
                published_date=published_date,
                author=author,
                source=source,
                metadata={
                    "platform": "rss",
                    "feed_url": feed_url,
                    "entry_id": getattr(entry, 'id', ''),
                    "tags": getattr(entry, 'tags', []),
                    "extracted_at": datetime.utcnow().isoformat()
                }
            )
            
        except Exception as e:
            logger.warning(f"Error creating RSS result: {e}")
            return None
