ira/ui/gradio_interface.py

239 lines
8.9 KiB
Python

"""
Gradio interface for the intelligent research system.
This module provides a web interface for users to interact with the research system.
"""
import os
import json
import gradio as gr
import sys
import time
from pathlib import Path
# Add the parent directory to the path to allow importing from other modules
sys.path.append(str(Path(__file__).parent.parent))
from query.query_processor import QueryProcessor
from execution.search_executor import SearchExecutor
from execution.result_collector import ResultCollector
class GradioInterface:
"""Gradio interface for the intelligent research system."""
def __init__(self):
"""Initialize the Gradio interface."""
self.query_processor = QueryProcessor()
self.search_executor = SearchExecutor()
self.result_collector = ResultCollector()
self.results_dir = Path(__file__).parent.parent / "results"
self.results_dir.mkdir(exist_ok=True)
def process_query(self, query, num_results=10):
"""
Process a query and return the results.
Args:
query (str): The query to process
num_results (int): Number of results to return
Returns:
tuple: (markdown_results, json_results_path)
"""
try:
# Process the query
print(f"Processing query: {query}")
processed_query = self.query_processor.process_query(query)
print(f"Processed query: {processed_query}")
# Get available search engines and print their status
available_engines = self.search_executor.get_available_search_engines()
print(f"Available search engines: {available_engines}")
# Check which handlers are actually available
for engine_name, handler in self.search_executor.available_handlers.items():
print(f"Handler {engine_name} available: {handler.is_available()}")
if not handler.is_available():
print(f" - Reason: API key may be missing for {engine_name}")
# Add search engines if not specified
if 'search_engines' not in processed_query:
processed_query['search_engines'] = available_engines
print(f"Using search engines: {available_engines}")
# Execute the search - request more results from each engine
print(f"Executing search...")
search_results = self.search_executor.execute_search(
structured_query=processed_query,
num_results=num_results
)
# Print which engines returned results
for engine, results in search_results.items():
print(f"Engine {engine} returned {len(results)} results")
# Process the results - don't limit the number of results
print(f"Processing results...")
processed_results = self.result_collector.process_results(
search_results, dedup=True, max_results=None
)
print(f"Processed {len(processed_results)} results")
# Save results to file
timestamp = int(time.time())
results_file = self.results_dir / f"results_{timestamp}.json"
# Ensure the results are not empty before saving
if processed_results:
with open(results_file, "w") as f:
json.dump(processed_results, f, indent=2)
print(f"Results saved to {results_file}")
file_path = str(results_file)
else:
error_message = "No results found. Please try a different query or check API keys."
print(error_message)
file_path = None
return f"## No Results Found\n\n{error_message}", file_path
# Format results for display
markdown_results = self._format_results_as_markdown(processed_results)
return markdown_results, file_path
except Exception as e:
error_message = f"Error processing query: {str(e)}"
print(f"ERROR: {error_message}")
import traceback
traceback.print_exc()
return f"## Error\n\n{error_message}", None
def _format_results_as_markdown(self, results):
"""
Format results as markdown.
Args:
results (list): List of result dictionaries
Returns:
str: Markdown formatted results
"""
if not results:
return "## No Results Found\n\nNo results were found for your query."
# Count results by source
source_counts = {}
for result in results:
source = result.get("source", "unknown")
source_counts[source] = source_counts.get(source, 0) + 1
# Create source distribution string
source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()])
markdown = f"## Search Results\n\n"
markdown += f"*Sources: {source_distribution}*\n\n"
for i, result in enumerate(results):
title = result.get("title", "Untitled")
url = result.get("url", "")
snippet = result.get("snippet", "No snippet available")
source = result.get("source", "unknown")
authors = result.get("authors", "Unknown")
year = result.get("year", "Unknown")
score = result.get("relevance_score", 0)
markdown += f"### {i+1}. {title}\n\n"
markdown += f"**Source**: {source}\n\n"
markdown += f"**URL**: [{url}]({url})\n\n"
markdown += f"**Snippet**: {snippet}\n\n"
markdown += f"**Authors**: {authors}\n\n"
markdown += f"**Year**: {year}\n\n"
markdown += f"**Score**: {score}\n\n"
markdown += "---\n\n"
return markdown
def create_interface(self):
"""
Create and return the Gradio interface.
Returns:
gr.Blocks: The Gradio interface
"""
with gr.Blocks(title="Intelligent Research System") as interface:
gr.Markdown("# Intelligent Research System")
gr.Markdown(
"""
This system helps you research topics by searching across multiple sources
including Google (via Serper), Google Scholar, and arXiv.
The system will return ALL results from each search engine, up to the maximum
number specified by the "Results Per Engine" slider. Results are ranked by
relevance across all sources.
"""
)
with gr.Row():
with gr.Column(scale=4):
query_input = gr.Textbox(
label="Research Query",
placeholder="Enter your research question here...",
lines=3
)
with gr.Column(scale=1):
num_results = gr.Slider(
minimum=5,
maximum=50,
value=20,
step=5,
label="Results Per Engine"
)
search_button = gr.Button("Search", variant="primary")
gr.Examples(
examples=[
["What are the latest advancements in quantum computing?"],
["Compare transformer and RNN architectures for NLP tasks"],
["Explain the environmental impact of electric vehicles"]
],
inputs=query_input
)
with gr.Row():
with gr.Column():
results_output = gr.Markdown(label="Results")
with gr.Row():
with gr.Column():
file_output = gr.Textbox(
label="Results saved to file",
interactive=False
)
search_button.click(
fn=self.process_query,
inputs=[query_input, num_results],
outputs=[results_output, file_output]
)
return interface
def launch(self, **kwargs):
"""
Launch the Gradio interface.
Args:
**kwargs: Keyword arguments to pass to gr.Interface.launch()
"""
interface = self.create_interface()
interface.launch(**kwargs)
def main():
"""Main function to launch the Gradio interface."""
interface = GradioInterface()
interface.launch(share=True)
if __name__ == "__main__":
main()