ira/ui/gradio_interface.py

"""
Gradio interface for the intelligent research system.
This module provides a web interface for users to interact with the research system.
"""

import os
import json
import gradio as gr
import sys
import time
import asyncio
from pathlib import Path
from datetime import datetime

# Add the parent directory to the path to allow importing from other modules
sys.path.append(str(Path(__file__).parent.parent))

from query.query_processor import QueryProcessor
from execution.search_executor import SearchExecutor
from execution.result_collector import ResultCollector
from execution.sub_question_executor import get_sub_question_executor
from report.report_generator import get_report_generator, initialize_report_generator
from report.report_detail_levels import get_report_detail_level_manager, DetailLevel
from config.config import Config


class GradioInterface:
    """Gradio interface for the intelligent research system."""

    def __init__(self):
        """Initialize the Gradio interface."""
        self.query_processor = QueryProcessor()
        self.search_executor = SearchExecutor()
        self.result_collector = ResultCollector()
        self.sub_question_executor = get_sub_question_executor()
        self.results_dir = Path(__file__).parent.parent / "results"
        self.results_dir.mkdir(exist_ok=True)

        # Create a dedicated reports directory with subdirectories
        self.reports_dir = Path(__file__).parent.parent / "reports"
        self.reports_dir.mkdir(exist_ok=True)

        # Create daily subdirectory for organization
        self.reports_daily_dir = self.reports_dir / datetime.now().strftime("%Y-%m-%d")
        self.reports_daily_dir.mkdir(exist_ok=True)

        # Create a metadata file to track reports
        self.reports_metadata_file = self.reports_dir / "reports_metadata.json"
        if not self.reports_metadata_file.exists():
            with open(self.reports_metadata_file, "w") as f:
                json.dump({"reports": []}, f, indent=2)

        self.detail_level_manager = get_report_detail_level_manager()
        self.config = Config()

        # The report generator will be initialized in the async init method
        self.report_generator = None

        # We're using Gradio's built-in progress tracking (gr.Progress) instead of custom elements

    async def async_init(self):
        """Asynchronously initialize components that require async initialization."""
        # Initialize the report generator
        await initialize_report_generator()
        self.report_generator = get_report_generator()
        return self

    def process_query(self, query, num_results=10, use_reranker=True):
        """
        Process a query and return the results.

        Args:
            query (str): The query to process
            num_results (int): Number of results to return
            use_reranker (bool): Whether to use the Jina Reranker for semantic ranking

        Returns:
            tuple: (markdown_results, json_results_path)
        """
        try:
            # Process the query
            print(f"Processing query: {query}")
            processed_query = self.query_processor.process_query(query)
            print(f"Processed query: {processed_query}")

            # Get available search engines and print their status
            available_engines = self.search_executor.get_available_search_engines()
            print(f"Available search engines: {available_engines}")

            # Check which handlers are actually available
            for engine_name, handler in self.search_executor.available_handlers.items():
                print(f"Handler {engine_name} available: {handler.is_available()}")
                if not handler.is_available():
                    print(f"  - Reason: API key may be missing for {engine_name}")

            # Add search engines if not specified
            if 'search_engines' not in processed_query:
                processed_query['search_engines'] = available_engines
                print(f"Using search engines: {available_engines}")

            # Execute the search - request more results from each engine
            print(f"Executing search...")
            search_results = self.search_executor.execute_search(
                structured_query=processed_query,
                num_results=num_results
            )

            # Print which engines returned results
            for engine, results in search_results.items():
                print(f"Engine {engine} returned {len(results)} results")

            # Add the query to each result for reranking
            enhanced_query = processed_query.get("enhanced_query", processed_query.get("original_query", query))

            # Flatten results for easier manipulation
            flattened_results = []
            for engine, results in search_results.items():
                for result in results:
                    # Add the query and engine to each result
                    result["query"] = enhanced_query
                    result["engine"] = engine
                    flattened_results.append(result)

            # Process the results - don't limit the number of results
            print(f"Processing results...")
            processed_results = self.result_collector.process_results(
                {"combined": flattened_results}, dedup=True, max_results=None, use_reranker=use_reranker
            )
            print(f"Processed {len(processed_results)} results")

            # Save results to file
            timestamp = int(time.time())
            results_file = self.results_dir / f"results_{timestamp}.json"

            # Ensure the results are not empty before saving
            if processed_results:
                with open(results_file, "w") as f:
                    json.dump(processed_results, f, indent=2)
                print(f"Results saved to {results_file}")
                file_path = str(results_file)
            else:
                error_message = "No results found. Please try a different query or check API keys."
                print(error_message)
                file_path = None
                return f"## No Results Found\n\n{error_message}", file_path

            # Format results for display
            markdown_results = self._format_results_as_markdown(processed_results)

            return markdown_results, file_path

        except Exception as e:
            error_message = f"Error processing query: {str(e)}"
            print(f"ERROR: {error_message}")
            import traceback
            traceback.print_exc()
            return f"## Error\n\n{error_message}", None

    def _format_results_as_markdown(self, results):
        """
        Format results as markdown.

        Args:
            results (list): List of result dictionaries

        Returns:
            str: Markdown formatted results
        """
        if not results:
            return "## No Results Found\n\nNo results were found for your query."

        # Count results by source
        source_counts = {}
        for result in results:
            source = result.get("source", "unknown")
            source_counts[source] = source_counts.get(source, 0) + 1

        # Create source distribution string
        source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()])

        markdown = f"## Search Results\n\n"
        markdown += f"*Sources: {source_distribution}*\n\n"

        for i, result in enumerate(results):
            title = result.get("title", "Untitled")
            url = result.get("url", "")
            snippet = result.get("snippet", "No snippet available")
            source = result.get("source", "unknown")
            authors = result.get("authors", "Unknown")
            year = result.get("year", "Unknown")
            score = result.get("relevance_score", 0)

            markdown += f"### {i+1}. {title}\n\n"
            markdown += f"**Source**: {source}\n\n"
            markdown += f"**URL**: [{url}]({url})\n\n"
            markdown += f"**Snippet**: {snippet}\n\n"
            markdown += f"**Authors**: {authors}\n\n"
            markdown += f"**Year**: {year}\n\n"
            markdown += f"**Score**: {score}\n\n"
            markdown += "---\n\n"

        return markdown

    async def generate_report(self, query, detail_level="standard", query_type="auto-detect", custom_model=None,
                       results_file=None, process_thinking_tags=False, initial_results=10, final_results=7,
                       progress=gr.Progress()):
        """
        Generate a report for the given query.

        Args:
            query: The query to generate a report for
            detail_level: The level of detail for the report (brief, standard, detailed, comprehensive)
            custom_model: Custom model to use for report generation
            results_file: Path to a file containing search results
            process_thinking_tags: Whether to process thinking tags in the model output
            progress: Gradio progress indicator

        Returns:
            Path to the generated report
        """
        try:
            # Create a timestamped output file in the daily directory
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            model_suffix = ""

            # Extract the actual model name from the description if selected
            if custom_model:
                # If the model is in the format "model_name (provider: model_display)"
                original_custom_model = custom_model
                if "(" in custom_model:
                    custom_model = custom_model.split(" (")[0]

                model_name = custom_model.split('/')[-1]
                model_suffix = f"_{model_name}"

                # Log the model selection for debugging
                print(f"Selected model from UI: {original_custom_model}")
                print(f"Extracted model name: {custom_model}")
                print(f"Using model suffix: {model_suffix}")

            # Create a unique report ID
            import hashlib
            report_id = f"{timestamp}_{hashlib.md5(query.encode()).hexdigest()[:8]}"

            # Define the output file path in the daily directory
            output_file = self.reports_daily_dir / f"report_{report_id}{model_suffix}.md"

            # Get detail level configuration
            config = self.detail_level_manager.get_detail_level_config(detail_level)

            # Override num_results if provided
            if initial_results:
                config["initial_results_per_engine"] = initial_results

            # Set final results after reranking if provided
            if final_results:
                config["final_results_after_reranking"] = final_results

            # If custom model is provided, use it
            if custom_model:
                # Extract the actual model name from the display name format if needed
                model_name = custom_model.split(" (")[0] if " (" in custom_model else custom_model
                config["model"] = model_name
                print(f"Using custom model: {model_name}")

            # Ensure report generator is initialized
            if self.report_generator is None:
                print("Initializing report generator...")
                await initialize_report_generator()
                self.report_generator = get_report_generator()

            # Debug: Print initial model configuration based on detail level
            detail_config = self.detail_level_manager.get_detail_level_config(detail_level)
            default_model = detail_config.get("model", "unknown")
            print(f"Default model for {detail_level} detail level: {default_model}")

            # Then explicitly override with custom model if provided
            if custom_model:
                # Extract the actual model name from the display name format
                # The format is "model_name (provider: model_display)"
                model_name = custom_model.split(" (")[0] if " (" in custom_model else custom_model
                print(f"Setting report generator to use custom model: {model_name}")

                # Look for a set_model method in the report generator
                if hasattr(self.report_generator, 'set_model'):
                    self.report_generator.set_model(model_name)
                    print(f"After setting custom model, report generator model is: {self.report_generator.model_name}")
                else:
                    print("Warning: Report generator does not have set_model method. Using alternative approach.")
                    # Update the config with the model as a fallback
                    current_config = self.report_generator.get_detail_level_config()
                    if current_config:
                        current_config["model"] = model_name
                        print(f"Updated config model to: {model_name}")

            print(f"Generating report with detail level: {detail_level}")
            print(f"Detail level configuration: {config}")
            print(f"Using model: {config['model']}")
            print(f"Processing thinking tags: {process_thinking_tags}")

            # If results file is provided, load results from it
            search_results = []
            if results_file and os.path.exists(results_file):
                with open(results_file, 'r') as f:
                    search_results = json.load(f)
                print(f"Loaded {len(search_results)} results from {results_file}")
            else:
                # If no results file is provided, perform a search
                print(f"No results file provided, performing search for: {query}")

                # Process the query to create a structured query
                structured_query = await self.query_processor.process_query(query)

                # Generate search queries for different engines
                structured_query = await self.query_processor.generate_search_queries(
                    structured_query,
                    self.search_executor.get_available_search_engines()
                )

                # Set the number of results to fetch per engine early so it's available throughout the function
                num_results_to_fetch = config.get("initial_results_per_engine", config.get("num_results", 10))

                # Initialize sub_question_results as an empty dict in case there are no sub-questions
                sub_question_results = {}

                # Check if the query was decomposed into sub-questions
                has_sub_questions = 'sub_questions' in structured_query and structured_query['sub_questions']
                if has_sub_questions:
                    # Log sub-questions
                    print(f"Query was decomposed into {len(structured_query['sub_questions'])} sub-questions:")
                    for i, sq in enumerate(structured_query['sub_questions']):
                        print(f"  {i+1}. {sq.get('sub_question')} (aspect: {sq.get('aspect')}, priority: {sq.get('priority')})")

                    # Execute searches for sub-questions
                    progress(0.1, desc="Executing searches for sub-questions...")
                    structured_query = await self.sub_question_executor.execute_sub_question_searches(
                        structured_query,
                        num_results_per_engine=3  # Use fewer results per engine for sub-questions
                    )

                    # Get combined results from sub-questions
                    sub_question_results = self.sub_question_executor.get_combined_results(structured_query)
                    print(f"Sub-questions returned results from {len(sub_question_results)} engines")

                    # Prioritize results from sub-questions
                    sub_question_results = self.sub_question_executor.prioritize_results(
                        sub_question_results,
                        max_results_per_engine=num_results_to_fetch  # Use same limit as main query
                    )
                    progress(0.2, desc="Completed sub-question searches")

                # Execute main search
                progress(0.3, desc="Executing main search...")
                search_results_dict = self.search_executor.execute_search(
                    structured_query,
                    num_results=num_results_to_fetch
                )

                # Add debug logging
                print(f"Main search results by engine:")
                for engine, results in search_results_dict.items():
                    print(f"  {engine}: {len(results)} results")

                # If we have sub-question results, combine them with the main search results
                if has_sub_questions and 'sub_questions' in structured_query:
                    print("Combining main search results with sub-question results")
                    progress(0.4, desc="Combining results from sub-questions...")

                    # Merge results from sub-questions into the main search results
                    for engine, results in sub_question_results.items():
                        if engine in search_results_dict:
                            # Add sub-question results to the main results
                            search_results_dict[engine].extend(results)
                            print(f"  Added {len(results)} results from sub-questions to {engine}")
                        else:
                            # Engine only has sub-question results
                            search_results_dict[engine] = results
                            print(f"  Added {len(results)} results from sub-questions as new engine {engine}")

                # Flatten the search results
                search_results = []
                for engine_results in search_results_dict.values():
                    search_results.extend(engine_results)

                print(f"Total flattened search results: {len(search_results)}")

                # Fallback mechanism if no search results are found
                if len(search_results) == 0:
                    print("WARNING: No search results found. Using fallback search mechanism...")

                    # Try a simplified version of the query
                    simplified_query = query.split(" ")[:10]  # Take first 10 words
                    simplified_query = " ".join(simplified_query)
                    if simplified_query != query:
                        print(f"Trying simplified query: {simplified_query}")

                        # Create a basic structured query
                        basic_structured_query = {
                            "original_query": simplified_query,
                            "enhanced_query": simplified_query,
                            "type": "unknown",
                            "intent": "research"
                        }

                        # Try search again with simplified query
                        search_results_dict = self.search_executor.execute_search(
                            basic_structured_query,
                            num_results=config["num_results"]
                        )

                        # Flatten the search results
                        search_results = []
                        for engine_results in search_results_dict.values():
                            search_results.extend(engine_results)

                        print(f"Fallback search returned {len(search_results)} results")

                # Second fallback: If still no results, create a mock result to prevent report generation failure
                if len(search_results) == 0:
                    print("WARNING: Fallback search also failed. Creating mock search result...")

                    # Create a mock search result with the query as the title
                    search_results = [{
                        "title": f"Information about: {query}",
                        "url": "https://example.com/search-result",
                        "snippet": f"This is a placeholder result for the query: {query}. " +
                                  "The search system was unable to find relevant results. " +
                                  "Please try refining your query or check your search API configuration.",
                        "source": "mock_result",
                        "score": 1.0
                    }]

                    print("Created mock search result to allow report generation to proceed")

                # Rerank results if we have a reranker
                if hasattr(self, 'reranker') and self.reranker:
                    # Use final_results_after_reranking if available, otherwise fall back to num_results
                    top_n_results = config.get("final_results_after_reranking", config.get("num_results", 7))
                    search_results = self.reranker.rerank_with_metadata(
                        query,
                        search_results,
                        document_key='snippet',
                        top_n=top_n_results
                    )

            # Set up progress tracking
            # Define progress callback function
            def progress_callback(current_progress, total_chunks, current_report):
                # Calculate current chunk number
                current_chunk = int(current_progress * total_chunks) if total_chunks > 0 else 0

                # Determine the status message based on progress
                if current_progress == 0:
                    status_message = "Preparing documents..."
                elif current_progress >= 1.0:
                    status_message = "Finalizing report..."
                else:
                    status_message = f"Processing chunk {current_chunk}/{total_chunks}..."

                    # Add current chunk title if available
                    if hasattr(self.report_generator, 'current_chunk_title'):
                        chunk_title = self.report_generator.current_chunk_title
                        if chunk_title:
                            status_message += f" ({chunk_title})"

                # Add model information to status message
                if hasattr(self.report_generator, 'model_name') and self.report_generator.model_name:
                    model_display = self.report_generator.model_name.split('/')[-1]  # Extract model name without provider
                    status_message += f" (Using model: {model_display})"

                # Update the progress status directly
                return status_message

            # Set the progress callback for the report generator
            if hasattr(self.report_generator, 'set_progress_callback'):
                # Create a wrapper function that updates the UI elements
                def ui_progress_callback(current_progress, total_chunks, current_report):
                    status_message = progress_callback(current_progress, total_chunks, current_report)

                    # Use Gradio's built-in progress tracking mechanism
                    # This will properly update the UI during async operations
                    progress(current_progress, desc=status_message)

                    return status_message

                self.report_generator.set_progress_callback(ui_progress_callback)

            # Generate the report
            print(f"Generating report with {len(search_results)} search results")
            if len(search_results) == 0:
                print("WARNING: No search results found. Report generation may fail.")

            # Log the current model being used by the report generator
            print(f"Report generator is using model: {self.report_generator.model_name}")

            # Update progress status based on detail level
            if detail_level.lower() == "comprehensive":
                self.progress_status = "Generating progressive report..."
            else:
                self.progress_status = "Processing document chunks..."

            # Initial progress state is handled by Gradio's built-in progress tracking

            # Handle query_type parameter
            actual_query_type = None
            if query_type != "auto-detect":
                actual_query_type = query_type
                print(f"Using user-selected query type: {actual_query_type}")
            else:
                print("Using auto-detection for query type")

            # Ensure structured_query is defined
            if not locals().get('structured_query'):
                structured_query = None

            report = await self.report_generator.generate_report(
                search_results=search_results,
                query=query,
                token_budget=config["token_budget"],
                chunk_size=config["chunk_size"],
                overlap_size=config["overlap_size"],
                detail_level=detail_level,
                query_type=actual_query_type,
                structured_query=structured_query if structured_query and 'sub_questions' in structured_query else None
            )

            # Final progress update
            progress(1.0)

            # Process thinking tags if requested
            if process_thinking_tags:
                report = self._process_thinking_tags(report)

            # Save report to file
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(report)

            print(f"Report saved to: {output_file}")

            # Update report metadata
            self._update_report_metadata(report_id, {
                "id": report_id,
                "timestamp": timestamp,
                "query": query,
                "detail_level": detail_level,
                "query_type": query_type,
                "model": custom_model if custom_model else config.get("model", "default"),
                "file_path": str(output_file),
                "file_size": output_file.stat().st_size,
                "creation_date": datetime.now().isoformat()
            })

            return report, str(output_file)

        except Exception as e:
            error_message = f"Error generating report: {str(e)}"
            print(f"ERROR: {error_message}")
            import traceback
            traceback.print_exc()
            return f"## Error\n\n{error_message}", None

    def _process_thinking_tags(self, text):
        """
        Process thinking tags in the text.

        Args:
            text (str): Text to process

        Returns:
            str: Processed text
        """
        # Remove content between <thinking> and </thinking> tags
        import re
        return re.sub(r'<thinking>.*?</thinking>', '', text, flags=re.DOTALL)

    def _update_report_metadata(self, report_id, metadata):
        """
        Update the report metadata file with new report information.

        Args:
            report_id (str): Unique identifier for the report
            metadata (dict): Report metadata to store
        """
        try:
            # Load existing metadata
            with open(self.reports_metadata_file, 'r') as f:
                all_metadata = json.load(f)

            # Check if report already exists
            existing_report = None
            for i, report in enumerate(all_metadata.get('reports', [])):
                if report.get('id') == report_id:
                    existing_report = i
                    break

            # Update or add the report metadata
            if existing_report is not None:
                all_metadata['reports'][existing_report] = metadata
            else:
                all_metadata['reports'].append(metadata)

            # Save updated metadata
            with open(self.reports_metadata_file, 'w') as f:
                json.dump(all_metadata, f, indent=2)

            print(f"Updated metadata for report {report_id}")

        except Exception as e:
            print(f"Error updating report metadata: {str(e)}")

    def get_all_reports(self):
        """
        Get all report metadata.

        Returns:
            list: List of report metadata dictionaries
        """
        try:
            # Load metadata
            with open(self.reports_metadata_file, 'r') as f:
                all_metadata = json.load(f)

            # Return reports sorted by creation date (newest first)
            reports = all_metadata.get('reports', [])
            return sorted(reports, key=lambda x: x.get('creation_date', ''), reverse=True)

        except Exception as e:
            print(f"Error getting report metadata: {str(e)}")
            return []

    def delete_report(self, report_id):
        """
        Delete a report and its metadata.

        Args:
            report_id (str): ID of the report to delete

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Load metadata
            with open(self.reports_metadata_file, 'r') as f:
                all_metadata = json.load(f)

            # Find the report
            report_to_delete = None
            for report in all_metadata.get('reports', []):
                if report.get('id') == report_id:
                    report_to_delete = report
                    break

            if not report_to_delete:
                print(f"Report {report_id} not found")
                return False

            # Delete the report file
            file_path = report_to_delete.get('file_path')
            print(f"Deleting report: report_id={report_id}, file_path={file_path}")
            if file_path and Path(file_path).exists():
                print(f"File exists: {Path(file_path).exists()}")
                Path(file_path).unlink()
                print(f"Deleted report file: {file_path}")
            else:
                print(f"File not found or file_path is missing")

            # Remove from metadata
            all_metadata['reports'] = [r for r in all_metadata.get('reports', []) if r.get('id') != report_id]

            # Save updated metadata
            with open(self.reports_metadata_file, 'w') as f:
                json.dump(all_metadata, f, indent=2)

            print(f"Deleted report {report_id} from metadata")
            return True

        except Exception as e:
            print(f"Error deleting report: {str(e)}")
            return False

    def get_available_models(self):
        """
        Get a list of available models for report generation.

        Returns:
            list: List of available model names
        """
        # Get models from config
        models = []

        # Extract all model names from the config file
        if 'models' in self.config.config_data:
            models = list(self.config.config_data['models'].keys())

        # If no models found, provide some defaults
        if not models:
            models = [
                "llama-3.1-8b-instant",
                "llama-3.3-70b-versatile",
                "groq/deepseek-r1-distill-llama-70b-specdec",
                "openrouter-mixtral",
                "openrouter-claude",
                "gemini-2.0-flash-lite"
            ]

        return models

    def get_model_descriptions(self):
        """
        Get descriptions for available models.

        Returns:
            dict: Dictionary mapping model names to descriptions
        """
        descriptions = {}
        model_name_to_description = {}

        if 'models' in self.config.config_data:
            for model_name, model_config in self.config.config_data['models'].items():
                provider = model_config.get('provider', 'unknown')
                model_display = model_config.get('model_name', model_name)
                max_tokens = model_config.get('max_tokens', 'unknown')
                temperature = model_config.get('temperature', 'unknown')

                # Create a description that includes the provider and actual model name
                display_name = f"{model_name} ({provider}: {model_display})"
                descriptions[model_name] = display_name

                # Create a more detailed description for the dropdown tooltip
                detailed_info = f"{display_name} - Max tokens: {max_tokens}, Temperature: {temperature}"
                model_name_to_description[display_name] = detailed_info

        self.model_name_to_description = model_name_to_description
        return descriptions

    def _get_reports_for_display(self):
        """Get reports formatted for display in the UI"""
        reports = self.get_all_reports()
        display_data = []

        for report in reports:
            # Format timestamp for display
            timestamp = report.get('timestamp', '')
            creation_date = report.get('creation_date', '')
            if creation_date:
                try:
                    # Convert ISO format to datetime and format for display
                    dt = datetime.fromisoformat(creation_date)
                    formatted_date = dt.strftime('%Y-%m-%d %H:%M:%S')
                except:
                    formatted_date = creation_date
            else:
                formatted_date = timestamp

            # Format file size
            file_size = report.get('file_size', 0)
            if file_size < 1024:
                formatted_size = f"{file_size} B"
            elif file_size < 1024 * 1024:
                formatted_size = f"{file_size / 1024:.1f} KB"
            else:
                formatted_size = f"{file_size / (1024 * 1024):.1f} MB"

            # Add row to display data
            display_data.append([
                report.get('id', ''),
                report.get('query', '')[:50] + ('...' if len(report.get('query', '')) > 50 else ''),
                report.get('model', '').split('/')[-1],  # Show only the model name without provider
                report.get('detail_level', ''),
                formatted_date,
                formatted_size,
                Path(report.get('file_path', '')).name,  # Just the filename
            ])

        return display_data

    def _delete_selected_reports(self, selected_choices):
        """Delete selected reports

        Args:
            selected_choices (list): List of selected checkbox values in format "ID: Query (Model)"

        Returns:
            tuple: Updated reports table data and updated checkbox choices
        """
        if not selected_choices:
            # If no reports are selected, just refresh the display
            reports_data = self._get_reports_for_display()
            choices = self._get_report_choices(reports_data)
            return reports_data, choices, "No reports selected for deletion."

        print(f"Selected choices for deletion: {selected_choices}")

        # Extract report IDs from selected choices
        selected_report_ids = []
        for choice in selected_choices:
            try:
                # Convert to string and handle different input formats
                choice_str = str(choice).strip().strip('"\'')
                print(f"Processing choice: '{choice_str}'")

                # Split at the first colon to get the ID
                if ':' in choice_str:
                    report_id = choice_str.split(':', 1)[0].strip()
                    selected_report_ids.append(report_id)
                else:
                    # If no colon, use the entire string as ID
                    selected_report_ids.append(choice_str)
                    print(f"Using full string as ID: '{choice_str}'")
            except Exception as e:
                print(f"Error processing choice {choice}: {e}")

        print(f"Deleting report IDs: {selected_report_ids}")

        # Delete selected reports
        deleted_count = 0
        for report_id in selected_report_ids:
            if self.delete_report(report_id):
                deleted_count += 1
                print(f"Successfully deleted report: {report_id}")
            else:
                print(f"Failed to delete report: {report_id}")

        print(f"Deleted {deleted_count} reports")

        # Refresh the table and choices
        reports_data = self._get_reports_for_display()
        choices = self._get_report_choices(reports_data)
        status_message = f"Deleted {deleted_count} report(s)."
        return reports_data, choices, status_message

    def _download_selected_reports(self, selected_choices):
        """Prepare selected reports for download

        Args:
            selected_choices (list): List of selected checkbox values in format "ID: Query (Model)"

        Returns:
            list: List of file paths to download
        """
        if not selected_choices:
            return []

        print(f"Selected choices for download: {selected_choices}")

        # Extract report IDs from selected choices
        selected_report_ids = []
        for choice in selected_choices:
            try:
                # Convert to string and handle different input formats
                choice_str = str(choice).strip().strip('"\'')
                print(f"Processing choice: '{choice_str}'")

                # Split at the first colon to get the ID
                if ':' in choice_str:
                    report_id = choice_str.split(':', 1)[0].strip()
                    selected_report_ids.append(report_id)
                else:
                    # If no colon, use the entire string as ID
                    selected_report_ids.append(choice_str)
                    print(f"Using full string as ID: '{choice_str}'")
            except Exception as e:
                print(f"Error processing choice {choice}: {e}")

        print(f"Extracted report IDs: {selected_report_ids}")

        # Get file paths for selected reports
        all_reports = self.get_all_reports()
        files_to_download = []

        for report_id in selected_report_ids:
            report = next((r for r in all_reports if r.get('id') == report_id), None)
            if report and "file_path" in report:
                file_path = report["file_path"]
                print(f"Downloading report: report_id={report_id}, file_path={file_path}")
                # Verify the file exists
                if os.path.exists(file_path):
                    files_to_download.append(file_path)
                    print(f"Added file for download: {file_path}")
                else:
                    print(f"Warning: File does not exist: {file_path}")
            else:
                print(f"Warning: Could not find report with ID {report_id}")

        return files_to_download

    def _get_report_choices(self, reports_data):
        """Generate choices for the checkbox group based on reports data

        Args:
            reports_data (list): List of report data rows

        Returns:
            list: List of choices for the checkbox group in format "ID: Query (Model)"
        """
        choices = []
        # If reports_data is empty, return an empty list
        if not reports_data:
            return []

        # Get all reports from the metadata file to ensure IDs are available
        all_reports = self.get_all_reports()

        # Create a mapping of report IDs to their full data
        report_map = {report.get('id', ''): report for report in all_reports}

        for row in reports_data:
            try:
                report_id = row[0]
                if not report_id:
                    continue

                # Get data from the table row
                query = row[1]
                model = row[2]

                # Format: "ID: Query (Model)"
                choice_text = f"{report_id}: {query} ({model})"
                choices.append(choice_text)
            except (IndexError, TypeError) as e:
                print(f"Error processing report row: {e}")
                continue

        return choices

    def _refresh_reports_with_html(self):
        """Refresh the reports list with updated HTML

        Returns:
            tuple: Updated reports data, HTML content, and reset hidden field value
        """
        reports_data = self._get_reports_for_display()
        choices = self._get_report_choices(reports_data)
        html_content = create_checkbox_html(choices)
        return reports_data, html_content, "[]"  # Reset the hidden field

    def _delete_selected_reports_with_html(self, selected_json):
        """Delete selected reports and return updated HTML

        Args:
            selected_json (str): JSON string containing selected report IDs

        Returns:
            tuple: Updated reports data, HTML content, reset hidden field value, and status message
        """
        try:
            # Parse JSON with error handling
            if not selected_json or selected_json == "[]":
                selected = []
            else:
                try:
                    selected = json.loads(selected_json)
                    print(f"Parsed JSON selections: {selected}")
                except Exception as json_err:
                    print(f"JSON parse error: {json_err}")
                    # If JSON parsing fails, try to extract values directly
                    selected = [s.strip(' "') for s in selected_json.strip('[]').split(',')]
                    print(f"Fallback parsing to: {selected}")

            # Delete reports
            updated_table, _, message = self._delete_selected_reports(selected)
            choices = self._get_report_choices(updated_table)
            html_content = create_checkbox_html(choices)
            return updated_table, html_content, "[]", f"{message}"
        except Exception as e:
            import traceback
            traceback.print_exc()
            return self._get_reports_for_display(), create_checkbox_html([]), "[]", f"Error: {str(e)}"

    def _download_with_html(self, selected_json):
        """Prepare selected reports for download with improved JSON parsing

        Args:
            selected_json (str): JSON string containing selected report IDs

        Returns:
            list: Files prepared for download
        """
        try:
            # Parse JSON with error handling
            if not selected_json or selected_json == "[]":
                selected = []
            else:
                try:
                    selected = json.loads(selected_json)
                    print(f"Parsed JSON selections for download: {selected}")
                except Exception as json_err:
                    print(f"JSON parse error: {json_err}")
                    # If JSON parsing fails, try to extract values directly
                    selected = [s.strip(' "') for s in selected_json.strip('[]').split(',')]
                    print(f"Fallback parsing to: {selected}")

            # Get file paths for download
            files = self._download_selected_reports(selected)
            return files
        except Exception as e:
            import traceback
            traceback.print_exc()
            return []

    def _cleanup_old_reports(self, days):
        """Delete reports older than the specified number of days

        Args:
            days (int): Number of days to keep reports for

        Returns:
            list: Updated reports table data
        """
        try:
            if days <= 0:
                print("Cleanup skipped - days parameter is 0 or negative")
                return self._get_reports_for_display()

            # Calculate cutoff date
            from datetime import timedelta
            cutoff_date = datetime.now() - timedelta(days=days)
            cutoff_str = cutoff_date.isoformat()
            print(f"Cleaning up reports older than {cutoff_date.strftime('%Y-%m-%d %H:%M:%S')}")

            # Get all reports
            all_reports = self.get_all_reports()
            print(f"Found {len(all_reports)} total reports")
            reports_to_delete = []

            # Find reports older than cutoff date
            for report in all_reports:
                creation_date = report.get('creation_date', '')
                if not creation_date:
                    print(f"Warning: Report {report.get('id')} has no creation date")
                    continue

                if creation_date < cutoff_str:
                    reports_to_delete.append(report.get('id'))
                    print(f"Marking report {report.get('id')} from {creation_date} for deletion")

            print(f"Found {len(reports_to_delete)} reports to delete")

            # Delete old reports
            deleted_count = 0
            for report_id in reports_to_delete:
                if self.delete_report(report_id):
                    deleted_count += 1

            print(f"Successfully deleted {deleted_count} reports")

            # Refresh the table
            updated_display = self._get_reports_for_display()
            print(f"Returning updated display with {len(updated_display)} reports")
            return updated_display

        except Exception as e:
            print(f"Error in cleanup_old_reports: {e}")
            import traceback
            traceback.print_exc()
            # Return current display data in case of error
            return self._get_reports_for_display()

    def migrate_existing_reports(self):
        """Migrate existing reports from the root directory to the reports directory structure

        Returns:
            str: Status message indicating the result of the migration
        """
        import re
        import shutil
        import os

        # Pattern to match report files like report_20250317_122351_llama-3.3-70b-versatile.md
        report_pattern = re.compile(r'report_(?P<date>\d{8})_(?P<time>\d{6})_?(?P<model>.*?)?\.md$')

        # Get the root directory
        root_dir = Path(__file__).parent.parent

        # Find all report files in the root directory
        migrated_count = 0
        for file_path in root_dir.glob('report_*.md'):
            if not file_path.is_file():
                continue

            # Extract information from the filename
            match = report_pattern.match(file_path.name)
            if not match:
                continue

            date_str = match.group('date')
            time_str = match.group('time')
            model = match.group('model') or 'unknown'

            # Format date for directory structure (YYYY-MM-DD)
            try:
                year = date_str[:4]
                month = date_str[4:6]
                day = date_str[6:8]
                formatted_date = f"{year}-{month}-{day}"

                # Create timestamp for metadata
                timestamp = f"{year}-{month}-{day} {time_str[:2]}:{time_str[2:4]}:{time_str[4:6]}"
                creation_date = datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S").isoformat()
            except ValueError:
                # If date parsing fails, use current date
                formatted_date = datetime.now().strftime("%Y-%m-%d")
                creation_date = datetime.now().isoformat()

            # Create directory for the date if it doesn't exist
            date_dir = self.reports_dir / formatted_date
            date_dir.mkdir(exist_ok=True)

            # Generate a unique report ID
            report_id = f"{date_str}_{time_str}"

            # Copy the file to the new location
            new_file_path = date_dir / file_path.name
            shutil.copy2(file_path, new_file_path)

            # Read the report content to extract query if possible
            query = ""
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read(1000)  # Read just the beginning to find the query
                    # Try to extract query from title or first few lines
                    title_match = re.search(r'#\s*(.+?)\n', content)
                    if title_match:
                        query = title_match.group(1).strip()
                    else:
                        # Just use the first line as query
                        query = content.split('\n')[0].strip()
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")

            # Create metadata for the report
            file_size = os.path.getsize(file_path)
            metadata = {
                "id": report_id,
                "query": query,
                "model": model,
                "detail_level": "unknown",  # We don't know the detail level from the filename
                "timestamp": timestamp,
                "creation_date": creation_date,
                "file_path": str(new_file_path),
                "file_size": file_size
            }

            # Update the metadata file
            self._update_report_metadata(report_id, metadata)
            migrated_count += 1

        return f"Migrated {migrated_count} existing reports to the new directory structure."

    def create_interface(self):
        """
        Create and return the Gradio interface.

        Returns:
            gr.Blocks: The Gradio interface
        """
        with gr.Blocks(title="Intelligent Research System") as interface:
            gr.Markdown("# Intelligent Research System")
            gr.Markdown(
                """
                This system helps you research topics by searching across multiple sources
                including Google (via Serper), Google Scholar, arXiv, and news sources.

                You can either search for results or generate a comprehensive report.

                **Special Capabilities:**
                - Automatically detects and optimizes current events queries
                - Specialized search handlers for different types of information
                - Semantic ranking for the most relevant results
                """
            )

            # Create tabs for different sections
            with gr.Tabs() as tabs:
                # Report Generation Tab
                with gr.TabItem("Generate Report"):
                    with gr.Row():
                        with gr.Column(scale=4):
                            report_query_input = gr.Textbox(
                                label="Research Query",
                                placeholder="Enter your research question here...",
                                lines=3
                            )
                        with gr.Column(scale=1):
                            report_detail_level = gr.Dropdown(
                                choices=["brief", "standard", "detailed", "comprehensive"],
                                value="standard",
                                label="Detail Level",
                                info="Controls the depth and breadth of the report"
                            )
                            report_query_type = gr.Dropdown(
                                choices=["auto-detect", "factual", "exploratory", "comparative", "code"],
                                value="auto-detect",
                                label="Query Type",
                                info="Type of query determines the report structure"
                            )
                            model_descriptions = self.get_model_descriptions()
                            report_custom_model = gr.Dropdown(
                                choices=list(self.model_name_to_description.keys()),
                                value=None,
                                label="Custom Model (Optional)",
                                info="Select a custom model for report generation"
                            )

                    with gr.Row():
                        with gr.Column():
                            gr.Markdown("### Advanced Settings")

                    with gr.Row():
                        with gr.Column():
                            with gr.Accordion("Search Parameters", open=False):
                                with gr.Row():
                                    initial_results_slider = gr.Slider(
                                        minimum=5,
                                        maximum=50,
                                        value=10,
                                        step=5,
                                        label="Initial Results Per Engine",
                                        info="Number of results to fetch from each search engine"
                                    )
                                    final_results_slider = gr.Slider(
                                        minimum=3,
                                        maximum=30,
                                        value=7,
                                        step=1,
                                        label="Final Results After Reranking",
                                        info="Number of results to keep after reranking"
                                    )

                            with gr.Accordion("Processing Options", open=False):
                                with gr.Row():
                                    report_process_thinking = gr.Checkbox(
                                        label="Process Thinking Tags",
                                        value=False,
                                        info="Process <thinking> tags in model output"
                                    )

                    with gr.Row():
                        report_button = gr.Button("Generate Report", variant="primary", size="lg")

                    # Note: We've removed the redundant progress indicators here
                    # The built-in Gradio progress tracking (gr.Progress) is used instead
                    # This is passed to the generate_report method and handles progress updates

                    gr.Examples(
                        examples=[
                            ["What are the latest advancements in quantum computing?"],
                            ["Compare transformer and RNN architectures for NLP tasks"],
                            ["Explain the environmental impact of electric vehicles"],
                            ["Explain the potential relationship between creatine supplementation and muscle loss due to GLP1-ar drugs for weight loss."],
                            ["What recent actions has Trump taken regarding tariffs?"],
                            ["What are the recent papers on large language model alignment?"],
                            ["What are the main research findings on climate change adaptation strategies in agriculture?"]
                        ],
                        inputs=report_query_input
                    )

                    with gr.Row():
                        with gr.Column():
                            report_output = gr.Markdown(label="Generated Report")

                    with gr.Row():
                        with gr.Column():
                            report_file_output = gr.Textbox(
                                label="Report saved to file",
                                interactive=False
                            )

                    # Add information about detail levels and query types
                    detail_levels_info = ""
                    for level, description in self.detail_level_manager.get_available_detail_levels():
                        detail_levels_info += f"- **{level}**: {description}\n"

                    query_types_info = """
                    - **auto-detect**: Automatically determine the query type based on the query text
                    - **factual**: For queries seeking specific information (e.g., "What is...", "How does...")
                    - **exploratory**: For queries investigating a topic broadly (e.g., "Tell me about...")
                    - **comparative**: For queries comparing multiple items (e.g., "Compare X and Y", "Differences between...")
                    - **code**: For queries related to programming, software development, or technical implementation
                    """

                    gr.Markdown(f"### Detail Levels\n{detail_levels_info}")
                    gr.Markdown(f"### Query Types\n{query_types_info}")

                # Report Management Tab - Reimplemented from scratch
                with gr.TabItem("Manage Reports"):
                    with gr.Row():
                        gr.Markdown("## Report Management")

                    with gr.Row():
                        gr.Markdown("Select reports to download or delete. You can filter and sort the reports using the table controls.")

                    # Get the reports data
                    reports_data = self._get_reports_for_display()

                    # Create a state to store selected report IDs
                    selected_report_ids = gr.State([])

                    # We've removed the DataTable as requested by the user

                    # Selection controls
                    with gr.Row():
                        with gr.Column(scale=2):
                            # Create a checkbox group for selecting reports
                            report_choices = self._get_report_choices(reports_data)
                            reports_checkbox_group = gr.CheckboxGroup(
                                choices=report_choices,
                                label="Select Reports",
                                info="Check the reports you want to download or delete",
                                interactive=True
                            )

                        with gr.Column(scale=1):
                            # Action buttons
                            with gr.Row():
                                refresh_button = gr.Button("Refresh List", size="sm")

                            with gr.Row():
                                select_all_button = gr.Button("Select All", size="sm")
                                clear_selection_button = gr.Button("Clear Selection", size="sm")

                            with gr.Row():
                                download_button = gr.Button("Download Selected", size="sm")
                                delete_button = gr.Button("Delete Selected", variant="stop", size="sm")

                            with gr.Row():
                                cleanup_days = gr.Slider(
                                    minimum=0,
                                    maximum=90,
                                    value=30,
                                    step=1,
                                    label="Delete Reports Older Than (Days)",
                                    info="Set to 0 to disable automatic cleanup"
                                )
                                cleanup_button = gr.Button("Clean Up Old Reports", size="sm")

                    # File download component
                    with gr.Row():
                        file_output = gr.File(
                            label="Downloaded Reports",
                            file_count="multiple",
                            type="filepath",
                            interactive=False
                        )

                    # Status message
                    with gr.Row():
                        status_message = gr.Markdown("")

                    # Migration button for existing reports
                    with gr.Row():
                        with gr.Column():
                            gr.Markdown("### Migrate Existing Reports")
                            gr.Markdown("Use this button to migrate existing reports from the root directory to the new reports directory structure.")
                            migrate_button = gr.Button("Migrate Existing Reports", variant="primary")

            # Set up event handlers
            # Update the progress tracking in the generate_report method
            async def generate_report_with_progress(query, detail_level, query_type, model_name, process_thinking, initial_results, final_results):
                # Set up progress tracking
                progress_data = gr.Progress(track_tqdm=True)

                # Debug the model selection
                print(f"Model selected from UI dropdown: {model_name}")

                # Call the original generate_report method
                result = await self.generate_report(
                    query,
                    detail_level,
                    query_type,
                    model_name,
                    None,  # results_file is now None since we removed the search tab
                    process_thinking,
                    initial_results,
                    final_results
                )

                return result

            report_button.click(
                fn=lambda q, d, t, m, p, i, f: asyncio.run(generate_report_with_progress(q, d, t, m, p, i, f)),
                inputs=[report_query_input, report_detail_level, report_query_type, report_custom_model,
                        report_process_thinking, initial_results_slider, final_results_slider],
                outputs=[report_output, report_file_output]
            )

            # Report Management Tab Event Handlers

            # Refresh reports list
            def refresh_reports_list():
                """Refresh the reports list and update the UI components"""
                reports_data = self._get_reports_for_display()
                report_choices = self._get_report_choices(reports_data)
                return reports_data, report_choices, "Reports list refreshed."

            refresh_button.click(
                fn=refresh_reports_list,
                inputs=[],
                outputs=[reports_checkbox_group, reports_checkbox_group, status_message]
            )

            # Select all reports
            def select_all_reports():
                """Select all reports in the checkbox group"""
                report_choices = self._get_report_choices(self._get_reports_for_display())
                return report_choices, "Selected all reports."

            select_all_button.click(
                fn=select_all_reports,
                inputs=[],
                outputs=[reports_checkbox_group, status_message]
            )

            # Clear selection
            def clear_selection():
                """Clear the selection in the checkbox group"""
                return [], "Selection cleared."

            clear_selection_button.click(
                fn=clear_selection,
                inputs=[],
                outputs=[reports_checkbox_group, status_message]
            )

            # Download selected reports
            def download_selected_reports(selected_choices):
                """Download selected reports"""
                if not selected_choices:
                    return [], "No reports selected for download."

                print(f"Selected choices for download: {selected_choices}")
                files = self._download_selected_reports(selected_choices)

                if files:
                    return files, f"Prepared {len(files)} report(s) for download."
                else:
                    return [], "No files found for the selected reports."

            download_button.click(
                fn=download_selected_reports,
                inputs=[reports_checkbox_group],
                outputs=[file_output, status_message]
            )

            # Delete selected reports
            def delete_selected_reports(selected_choices):
                """Delete selected reports and update the UI"""
                if not selected_choices:
                    return self._get_reports_for_display(), [], "No reports selected for deletion."

                print(f"Selected choices for deletion: {selected_choices}")

                # Extract report IDs from selected choices
                selected_report_ids = []
                for choice in selected_choices:
                    try:
                        # Split at the first colon to get the ID
                        if ':' in choice:
                            report_id = choice.split(':', 1)[0].strip()
                            selected_report_ids.append(report_id)
                        else:
                            # If no colon, use the entire string as ID
                            selected_report_ids.append(choice)
                    except Exception as e:
                        print(f"Error processing choice {choice}: {e}")

                # Delete selected reports
                deleted_count = 0
                for report_id in selected_report_ids:
                    if self.delete_report(report_id):
                        deleted_count += 1

                # Refresh the table and choices
                updated_reports_data = self._get_reports_for_display()
                updated_choices = self._get_report_choices(updated_reports_data)

                return updated_choices, f"Deleted {deleted_count} report(s)."

            delete_button.click(
                fn=delete_selected_reports,
                inputs=[reports_checkbox_group],
                outputs=[reports_checkbox_group, status_message]
            )

            # Clean up old reports
            def cleanup_old_reports(days):
                """Delete reports older than the specified number of days"""
                if days <= 0:
                    return self._get_reports_for_display(), self._get_report_choices(self._get_reports_for_display()), "Cleanup skipped - days parameter is 0 or negative."

                updated_reports_data = self._cleanup_old_reports(days)
                updated_choices = self._get_report_choices(updated_reports_data)

                return updated_reports_data, updated_choices, f"Reports older than {days} days have been deleted."

            cleanup_button.click(
                fn=cleanup_old_reports,
                inputs=[cleanup_days],
                outputs=[reports_checkbox_group, status_message]
            )

            # Migration button event handler
            def migrate_existing_reports():
                """Migrate existing reports from the root directory to the reports directory structure"""
                print("Starting migration of existing reports...")
                status = self.migrate_existing_reports()
                print("Migration completed, refreshing display...")

                # Refresh the reports list
                updated_reports_data = self._get_reports_for_display()
                updated_choices = self._get_report_choices(updated_reports_data)

                return status, updated_reports_data, updated_choices

            migrate_button.click(
                fn=migrate_existing_reports,
                inputs=[],
                outputs=[status_message, reports_checkbox_group]
            )

            # Initialize the UI on page load
            def init_reports_ui():
                """Initialize the reports UI with current data"""
                print("Initializing reports UI...")
                reports_data = self._get_reports_for_display()
                choices = self._get_report_choices(reports_data)

                print(f"Initializing reports UI with {len(reports_data)} reports and {len(choices)} choices")

                return choices, "Reports management initialized successfully."

            interface.load(
                fn=init_reports_ui,
                inputs=[],
                outputs=[reports_checkbox_group, status_message]
            )

        return interface

    def launch(self, **kwargs):
        """
        Launch the Gradio interface.

        Args:
            **kwargs: Keyword arguments to pass to gr.Interface.launch()
        """
        interface = self.create_interface()
        interface.launch(**kwargs)


def main():
    """Main function to launch the Gradio interface."""
    # Create interface and initialize async components
    interface = GradioInterface()

    # Run the async initialization in the event loop
    loop = asyncio.get_event_loop()
    loop.run_until_complete(interface.async_init())

    # Launch the interface
    interface.launch(share=True)


if __name__ == "__main__":
    main()