Update Gradio interface to return all results and allow more results per engine
This commit is contained in:
parent
124aa109a1
commit
a7aa9ea580
|
@ -123,12 +123,12 @@ class ResultCollector:
|
||||||
source = result.get("source", "")
|
source = result.get("source", "")
|
||||||
if source == "scholar":
|
if source == "scholar":
|
||||||
score += 10
|
score += 10
|
||||||
|
elif source == "serper":
|
||||||
|
score += 9
|
||||||
elif source == "arxiv":
|
elif source == "arxiv":
|
||||||
score += 8
|
score += 8
|
||||||
elif source == "google":
|
elif source == "google":
|
||||||
score += 5
|
score += 5
|
||||||
elif source == "serper":
|
|
||||||
score += 5
|
|
||||||
|
|
||||||
# Boost score based on position in original results
|
# Boost score based on position in original results
|
||||||
position = result.get("raw_data", {}).get("position", 0)
|
position = result.get("raw_data", {}).get("position", 0)
|
||||||
|
|
|
@ -46,24 +46,36 @@ class GradioInterface:
|
||||||
processed_query = self.query_processor.process_query(query)
|
processed_query = self.query_processor.process_query(query)
|
||||||
print(f"Processed query: {processed_query}")
|
print(f"Processed query: {processed_query}")
|
||||||
|
|
||||||
|
# Get available search engines and print their status
|
||||||
|
available_engines = self.search_executor.get_available_search_engines()
|
||||||
|
print(f"Available search engines: {available_engines}")
|
||||||
|
|
||||||
|
# Check which handlers are actually available
|
||||||
|
for engine_name, handler in self.search_executor.available_handlers.items():
|
||||||
|
print(f"Handler {engine_name} available: {handler.is_available()}")
|
||||||
|
if not handler.is_available():
|
||||||
|
print(f" - Reason: API key may be missing for {engine_name}")
|
||||||
|
|
||||||
# Add search engines if not specified
|
# Add search engines if not specified
|
||||||
if 'search_engines' not in processed_query:
|
if 'search_engines' not in processed_query:
|
||||||
available_engines = self.search_executor.get_available_search_engines()
|
|
||||||
processed_query['search_engines'] = available_engines
|
processed_query['search_engines'] = available_engines
|
||||||
print(f"Using search engines: {available_engines}")
|
print(f"Using search engines: {available_engines}")
|
||||||
|
|
||||||
# Execute the search
|
# Execute the search - request more results from each engine
|
||||||
print(f"Executing search...")
|
print(f"Executing search...")
|
||||||
search_results = self.search_executor.execute_search(
|
search_results = self.search_executor.execute_search(
|
||||||
structured_query=processed_query,
|
structured_query=processed_query,
|
||||||
num_results=num_results
|
num_results=num_results
|
||||||
)
|
)
|
||||||
print(f"Search results: {search_results}")
|
|
||||||
|
|
||||||
# Process the results
|
# Print which engines returned results
|
||||||
|
for engine, results in search_results.items():
|
||||||
|
print(f"Engine {engine} returned {len(results)} results")
|
||||||
|
|
||||||
|
# Process the results - don't limit the number of results
|
||||||
print(f"Processing results...")
|
print(f"Processing results...")
|
||||||
processed_results = self.result_collector.process_results(
|
processed_results = self.result_collector.process_results(
|
||||||
search_results, dedup=True, max_results=num_results
|
search_results, dedup=True, max_results=None
|
||||||
)
|
)
|
||||||
print(f"Processed {len(processed_results)} results")
|
print(f"Processed {len(processed_results)} results")
|
||||||
|
|
||||||
|
@ -97,48 +109,45 @@ class GradioInterface:
|
||||||
|
|
||||||
def _format_results_as_markdown(self, results):
|
def _format_results_as_markdown(self, results):
|
||||||
"""
|
"""
|
||||||
Format search results as markdown.
|
Format results as markdown.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
results (list): List of search result dictionaries
|
results (list): List of result dictionaries
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Markdown formatted results
|
str: Markdown formatted results
|
||||||
"""
|
"""
|
||||||
if not results:
|
if not results:
|
||||||
return "## No results found"
|
return "## No Results Found\n\nNo results were found for your query."
|
||||||
|
|
||||||
markdown = "## Search Results\n\n"
|
# Count results by source
|
||||||
|
source_counts = {}
|
||||||
|
for result in results:
|
||||||
|
source = result.get("source", "unknown")
|
||||||
|
source_counts[source] = source_counts.get(source, 0) + 1
|
||||||
|
|
||||||
|
# Create source distribution string
|
||||||
|
source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()])
|
||||||
|
|
||||||
|
markdown = f"## Search Results\n\n"
|
||||||
|
markdown += f"*Sources: {source_distribution}*\n\n"
|
||||||
|
|
||||||
for i, result in enumerate(results):
|
for i, result in enumerate(results):
|
||||||
title = result.get("title", "No title")
|
title = result.get("title", "Untitled")
|
||||||
url = result.get("url", "#")
|
url = result.get("url", "")
|
||||||
snippet = result.get("snippet", "No snippet available")
|
snippet = result.get("snippet", "No snippet available")
|
||||||
source = result.get("source", "unknown")
|
source = result.get("source", "unknown")
|
||||||
|
authors = result.get("authors", "Unknown")
|
||||||
|
year = result.get("year", "Unknown")
|
||||||
|
score = result.get("relevance_score", 0)
|
||||||
|
|
||||||
markdown += f"### {i+1}. {title}\n\n"
|
markdown += f"### {i+1}. {title}\n\n"
|
||||||
markdown += f"**Source**: {source}\n\n"
|
markdown += f"**Source**: {source}\n\n"
|
||||||
markdown += f"**URL**: [{url}]({url})\n\n"
|
markdown += f"**URL**: [{url}]({url})\n\n"
|
||||||
markdown += f"**Snippet**: {snippet}\n\n"
|
markdown += f"**Snippet**: {snippet}\n\n"
|
||||||
|
markdown += f"**Authors**: {authors}\n\n"
|
||||||
# Add additional fields based on source
|
markdown += f"**Year**: {year}\n\n"
|
||||||
if source == "scholar" or source == "arxiv":
|
markdown += f"**Score**: {score}\n\n"
|
||||||
authors = result.get("authors", "Unknown")
|
|
||||||
if isinstance(authors, list):
|
|
||||||
authors = ", ".join(authors)
|
|
||||||
year = result.get("year", "Unknown")
|
|
||||||
markdown += f"**Authors**: {authors}\n\n"
|
|
||||||
markdown += f"**Year**: {year}\n\n"
|
|
||||||
|
|
||||||
if source == "arxiv":
|
|
||||||
categories = result.get("categories", [])
|
|
||||||
if categories:
|
|
||||||
markdown += f"**Categories**: {', '.join(categories)}\n\n"
|
|
||||||
|
|
||||||
pdf_url = result.get("pdf_url", "")
|
|
||||||
if pdf_url:
|
|
||||||
markdown += f"**PDF**: [{pdf_url}]({pdf_url})\n\n"
|
|
||||||
|
|
||||||
markdown += "---\n\n"
|
markdown += "---\n\n"
|
||||||
|
|
||||||
return markdown
|
return markdown
|
||||||
|
@ -169,13 +178,22 @@ class GradioInterface:
|
||||||
with gr.Column(scale=1):
|
with gr.Column(scale=1):
|
||||||
num_results = gr.Slider(
|
num_results = gr.Slider(
|
||||||
minimum=5,
|
minimum=5,
|
||||||
maximum=30,
|
maximum=50,
|
||||||
value=10,
|
value=20,
|
||||||
step=5,
|
step=5,
|
||||||
label="Number of Results"
|
label="Results Per Engine"
|
||||||
)
|
)
|
||||||
search_button = gr.Button("Search", variant="primary")
|
search_button = gr.Button("Search", variant="primary")
|
||||||
|
|
||||||
|
gr.Examples(
|
||||||
|
examples=[
|
||||||
|
["What are the latest advancements in quantum computing?"],
|
||||||
|
["Compare transformer and RNN architectures for NLP tasks"],
|
||||||
|
["Explain the environmental impact of electric vehicles"]
|
||||||
|
],
|
||||||
|
inputs=query_input
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
results_output = gr.Markdown(label="Results")
|
results_output = gr.Markdown(label="Results")
|
||||||
|
@ -193,18 +211,6 @@ class GradioInterface:
|
||||||
outputs=[results_output, file_output]
|
outputs=[results_output, file_output]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Examples
|
|
||||||
gr.Examples(
|
|
||||||
[
|
|
||||||
["What are the latest advancements in quantum computing?"],
|
|
||||||
["Compare transformer and RNN architectures for NLP tasks"],
|
|
||||||
["Explain the environmental impact of electric vehicles"],
|
|
||||||
["What are the most effective treatments for depression?"],
|
|
||||||
["How does climate change affect biodiversity?"]
|
|
||||||
],
|
|
||||||
inputs=[query_input]
|
|
||||||
)
|
|
||||||
|
|
||||||
return interface
|
return interface
|
||||||
|
|
||||||
def launch(self, **kwargs):
|
def launch(self, **kwargs):
|
||||||
|
|
Loading…
Reference in New Issue