ira/ui/gradio_interface.py

235 lines
8.6 KiB
Python

"""
Gradio interface for the intelligent research system.
This module provides a web interface for users to interact with the research system.
"""
import os
import json
import gradio as gr
import sys
import time
from pathlib import Path
# Add the parent directory to the path to allow importing from other modules
sys.path.append(str(Path(__file__).parent.parent))
from query.query_processor import QueryProcessor
from execution.search_executor import SearchExecutor
from execution.result_collector import ResultCollector
class GradioInterface:
"""Gradio interface for the intelligent research system."""
def __init__(self):
"""Initialize the Gradio interface."""
self.query_processor = QueryProcessor()
self.search_executor = SearchExecutor()
self.result_collector = ResultCollector()
self.results_dir = Path(__file__).parent.parent / "results"
self.results_dir.mkdir(exist_ok=True)
def process_query(self, query, num_results=10):
"""
Process a query and return the results.
Args:
query (str): The query to process
num_results (int): Number of results to return
Returns:
tuple: (markdown_results, json_results_path)
"""
try:
# Process the query
print(f"Processing query: {query}")
processed_query = self.query_processor.process_query(query)
print(f"Processed query: {processed_query}")
# Get available search engines and print their status
available_engines = self.search_executor.get_available_search_engines()
print(f"Available search engines: {available_engines}")
# Check which handlers are actually available
for engine_name, handler in self.search_executor.available_handlers.items():
print(f"Handler {engine_name} available: {handler.is_available()}")
if not handler.is_available():
print(f" - Reason: API key may be missing for {engine_name}")
# Add search engines if not specified
if 'search_engines' not in processed_query:
processed_query['search_engines'] = available_engines
print(f"Using search engines: {available_engines}")
# Execute the search - request more results from each engine
print(f"Executing search...")
search_results = self.search_executor.execute_search(
structured_query=processed_query,
num_results=num_results
)
# Print which engines returned results
for engine, results in search_results.items():
print(f"Engine {engine} returned {len(results)} results")
# Process the results - don't limit the number of results
print(f"Processing results...")
processed_results = self.result_collector.process_results(
search_results, dedup=True, max_results=None
)
print(f"Processed {len(processed_results)} results")
# Save results to file
timestamp = int(time.time())
results_file = self.results_dir / f"results_{timestamp}.json"
# Ensure the results are not empty before saving
if processed_results:
with open(results_file, "w") as f:
json.dump(processed_results, f, indent=2)
print(f"Results saved to {results_file}")
file_path = str(results_file)
else:
error_message = "No results found. Please try a different query or check API keys."
print(error_message)
file_path = None
return f"## No Results Found\n\n{error_message}", file_path
# Format results for display
markdown_results = self._format_results_as_markdown(processed_results)
return markdown_results, file_path
except Exception as e:
error_message = f"Error processing query: {str(e)}"
print(f"ERROR: {error_message}")
import traceback
traceback.print_exc()
return f"## Error\n\n{error_message}", None
def _format_results_as_markdown(self, results):
"""
Format results as markdown.
Args:
results (list): List of result dictionaries
Returns:
str: Markdown formatted results
"""
if not results:
return "## No Results Found\n\nNo results were found for your query."
# Count results by source
source_counts = {}
for result in results:
source = result.get("source", "unknown")
source_counts[source] = source_counts.get(source, 0) + 1
# Create source distribution string
source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()])
markdown = f"## Search Results\n\n"
markdown += f"*Sources: {source_distribution}*\n\n"
for i, result in enumerate(results):
title = result.get("title", "Untitled")
url = result.get("url", "")
snippet = result.get("snippet", "No snippet available")
source = result.get("source", "unknown")
authors = result.get("authors", "Unknown")
year = result.get("year", "Unknown")
score = result.get("relevance_score", 0)
markdown += f"### {i+1}. {title}\n\n"
markdown += f"**Source**: {source}\n\n"
markdown += f"**URL**: [{url}]({url})\n\n"
markdown += f"**Snippet**: {snippet}\n\n"
markdown += f"**Authors**: {authors}\n\n"
markdown += f"**Year**: {year}\n\n"
markdown += f"**Score**: {score}\n\n"
markdown += "---\n\n"
return markdown
def create_interface(self):
"""
Create and return the Gradio interface.
Returns:
gr.Blocks: The Gradio interface
"""
with gr.Blocks(title="Intelligent Research System") as interface:
gr.Markdown("# Intelligent Research System")
gr.Markdown(
"""
This system helps you research topics by searching across multiple sources
including Google (via Serper), Google Scholar, and arXiv.
"""
)
with gr.Row():
with gr.Column(scale=4):
query_input = gr.Textbox(
label="Research Query",
placeholder="Enter your research question here...",
lines=3
)
with gr.Column(scale=1):
num_results = gr.Slider(
minimum=5,
maximum=50,
value=20,
step=5,
label="Results Per Engine"
)
search_button = gr.Button("Search", variant="primary")
gr.Examples(
examples=[
["What are the latest advancements in quantum computing?"],
["Compare transformer and RNN architectures for NLP tasks"],
["Explain the environmental impact of electric vehicles"]
],
inputs=query_input
)
with gr.Row():
with gr.Column():
results_output = gr.Markdown(label="Results")
with gr.Row():
with gr.Column():
file_output = gr.Textbox(
label="Results saved to file",
interactive=False
)
search_button.click(
fn=self.process_query,
inputs=[query_input, num_results],
outputs=[results_output, file_output]
)
return interface
def launch(self, **kwargs):
"""
Launch the Gradio interface.
Args:
**kwargs: Keyword arguments to pass to gr.Interface.launch()
"""
interface = self.create_interface()
interface.launch(**kwargs)
def main():
"""Main function to launch the Gradio interface."""
interface = GradioInterface()
interface.launch(share=True)
if __name__ == "__main__":
main()