""" Gradio interface for the intelligent research system. This module provides a web interface for users to interact with the research system. """ import os import json import gradio as gr import sys import time from pathlib import Path # Add the parent directory to the path to allow importing from other modules sys.path.append(str(Path(__file__).parent.parent)) from query.query_processor import QueryProcessor from execution.search_executor import SearchExecutor from execution.result_collector import ResultCollector class GradioInterface: """Gradio interface for the intelligent research system.""" def __init__(self): """Initialize the Gradio interface.""" self.query_processor = QueryProcessor() self.search_executor = SearchExecutor() self.result_collector = ResultCollector() self.results_dir = Path(__file__).parent.parent / "results" self.results_dir.mkdir(exist_ok=True) def process_query(self, query, num_results=10): """ Process a query and return the results. Args: query (str): The query to process num_results (int): Number of results to return Returns: tuple: (markdown_results, json_results_path) """ try: # Process the query print(f"Processing query: {query}") processed_query = self.query_processor.process_query(query) print(f"Processed query: {processed_query}") # Get available search engines and print their status available_engines = self.search_executor.get_available_search_engines() print(f"Available search engines: {available_engines}") # Check which handlers are actually available for engine_name, handler in self.search_executor.available_handlers.items(): print(f"Handler {engine_name} available: {handler.is_available()}") if not handler.is_available(): print(f" - Reason: API key may be missing for {engine_name}") # Add search engines if not specified if 'search_engines' not in processed_query: processed_query['search_engines'] = available_engines print(f"Using search engines: {available_engines}") # Execute the search - request more results from each engine print(f"Executing search...") search_results = self.search_executor.execute_search( structured_query=processed_query, num_results=num_results ) # Print which engines returned results for engine, results in search_results.items(): print(f"Engine {engine} returned {len(results)} results") # Process the results - don't limit the number of results print(f"Processing results...") processed_results = self.result_collector.process_results( search_results, dedup=True, max_results=None ) print(f"Processed {len(processed_results)} results") # Save results to file timestamp = int(time.time()) results_file = self.results_dir / f"results_{timestamp}.json" # Ensure the results are not empty before saving if processed_results: with open(results_file, "w") as f: json.dump(processed_results, f, indent=2) print(f"Results saved to {results_file}") file_path = str(results_file) else: error_message = "No results found. Please try a different query or check API keys." print(error_message) file_path = None return f"## No Results Found\n\n{error_message}", file_path # Format results for display markdown_results = self._format_results_as_markdown(processed_results) return markdown_results, file_path except Exception as e: error_message = f"Error processing query: {str(e)}" print(f"ERROR: {error_message}") import traceback traceback.print_exc() return f"## Error\n\n{error_message}", None def _format_results_as_markdown(self, results): """ Format results as markdown. Args: results (list): List of result dictionaries Returns: str: Markdown formatted results """ if not results: return "## No Results Found\n\nNo results were found for your query." # Count results by source source_counts = {} for result in results: source = result.get("source", "unknown") source_counts[source] = source_counts.get(source, 0) + 1 # Create source distribution string source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()]) markdown = f"## Search Results\n\n" markdown += f"*Sources: {source_distribution}*\n\n" for i, result in enumerate(results): title = result.get("title", "Untitled") url = result.get("url", "") snippet = result.get("snippet", "No snippet available") source = result.get("source", "unknown") authors = result.get("authors", "Unknown") year = result.get("year", "Unknown") score = result.get("relevance_score", 0) markdown += f"### {i+1}. {title}\n\n" markdown += f"**Source**: {source}\n\n" markdown += f"**URL**: [{url}]({url})\n\n" markdown += f"**Snippet**: {snippet}\n\n" markdown += f"**Authors**: {authors}\n\n" markdown += f"**Year**: {year}\n\n" markdown += f"**Score**: {score}\n\n" markdown += "---\n\n" return markdown def create_interface(self): """ Create and return the Gradio interface. Returns: gr.Blocks: The Gradio interface """ with gr.Blocks(title="Intelligent Research System") as interface: gr.Markdown("# Intelligent Research System") gr.Markdown( """ This system helps you research topics by searching across multiple sources including Google (via Serper), Google Scholar, and arXiv. """ ) with gr.Row(): with gr.Column(scale=4): query_input = gr.Textbox( label="Research Query", placeholder="Enter your research question here...", lines=3 ) with gr.Column(scale=1): num_results = gr.Slider( minimum=5, maximum=50, value=20, step=5, label="Results Per Engine" ) search_button = gr.Button("Search", variant="primary") gr.Examples( examples=[ ["What are the latest advancements in quantum computing?"], ["Compare transformer and RNN architectures for NLP tasks"], ["Explain the environmental impact of electric vehicles"] ], inputs=query_input ) with gr.Row(): with gr.Column(): results_output = gr.Markdown(label="Results") with gr.Row(): with gr.Column(): file_output = gr.Textbox( label="Results saved to file", interactive=False ) search_button.click( fn=self.process_query, inputs=[query_input, num_results], outputs=[results_output, file_output] ) return interface def launch(self, **kwargs): """ Launch the Gradio interface. Args: **kwargs: Keyword arguments to pass to gr.Interface.launch() """ interface = self.create_interface() interface.launch(**kwargs) def main(): """Main function to launch the Gradio interface.""" interface = GradioInterface() interface.launch(share=True) if __name__ == "__main__": main()