From a7aa9ea5806cf94b43d31eb60114bd98a7a5b65b Mon Sep 17 00:00:00 2001 From: Steve White Date: Thu, 27 Feb 2025 16:49:37 -0600 Subject: [PATCH] Update Gradio interface to return all results and allow more results per engine --- execution/result_collector.py | 4 +- ui/gradio_interface.py | 96 +++++++++++++++++++---------------- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/execution/result_collector.py b/execution/result_collector.py index 16bccff..d3e1fbc 100644 --- a/execution/result_collector.py +++ b/execution/result_collector.py @@ -123,12 +123,12 @@ class ResultCollector: source = result.get("source", "") if source == "scholar": score += 10 + elif source == "serper": + score += 9 elif source == "arxiv": score += 8 elif source == "google": score += 5 - elif source == "serper": - score += 5 # Boost score based on position in original results position = result.get("raw_data", {}).get("position", 0) diff --git a/ui/gradio_interface.py b/ui/gradio_interface.py index b245ab5..56f6581 100644 --- a/ui/gradio_interface.py +++ b/ui/gradio_interface.py @@ -46,24 +46,36 @@ class GradioInterface: processed_query = self.query_processor.process_query(query) print(f"Processed query: {processed_query}") + # Get available search engines and print their status + available_engines = self.search_executor.get_available_search_engines() + print(f"Available search engines: {available_engines}") + + # Check which handlers are actually available + for engine_name, handler in self.search_executor.available_handlers.items(): + print(f"Handler {engine_name} available: {handler.is_available()}") + if not handler.is_available(): + print(f" - Reason: API key may be missing for {engine_name}") + # Add search engines if not specified if 'search_engines' not in processed_query: - available_engines = self.search_executor.get_available_search_engines() processed_query['search_engines'] = available_engines print(f"Using search engines: {available_engines}") - # Execute the search + # Execute the search - request more results from each engine print(f"Executing search...") search_results = self.search_executor.execute_search( structured_query=processed_query, num_results=num_results ) - print(f"Search results: {search_results}") - # Process the results + # Print which engines returned results + for engine, results in search_results.items(): + print(f"Engine {engine} returned {len(results)} results") + + # Process the results - don't limit the number of results print(f"Processing results...") processed_results = self.result_collector.process_results( - search_results, dedup=True, max_results=num_results + search_results, dedup=True, max_results=None ) print(f"Processed {len(processed_results)} results") @@ -97,48 +109,45 @@ class GradioInterface: def _format_results_as_markdown(self, results): """ - Format search results as markdown. + Format results as markdown. Args: - results (list): List of search result dictionaries + results (list): List of result dictionaries Returns: str: Markdown formatted results """ if not results: - return "## No results found" + return "## No Results Found\n\nNo results were found for your query." - markdown = "## Search Results\n\n" + # Count results by source + source_counts = {} + for result in results: + source = result.get("source", "unknown") + source_counts[source] = source_counts.get(source, 0) + 1 + + # Create source distribution string + source_distribution = ", ".join([f"{source}: {count}" for source, count in source_counts.items()]) + + markdown = f"## Search Results\n\n" + markdown += f"*Sources: {source_distribution}*\n\n" for i, result in enumerate(results): - title = result.get("title", "No title") - url = result.get("url", "#") + title = result.get("title", "Untitled") + url = result.get("url", "") snippet = result.get("snippet", "No snippet available") source = result.get("source", "unknown") + authors = result.get("authors", "Unknown") + year = result.get("year", "Unknown") + score = result.get("relevance_score", 0) markdown += f"### {i+1}. {title}\n\n" markdown += f"**Source**: {source}\n\n" markdown += f"**URL**: [{url}]({url})\n\n" markdown += f"**Snippet**: {snippet}\n\n" - - # Add additional fields based on source - if source == "scholar" or source == "arxiv": - authors = result.get("authors", "Unknown") - if isinstance(authors, list): - authors = ", ".join(authors) - year = result.get("year", "Unknown") - markdown += f"**Authors**: {authors}\n\n" - markdown += f"**Year**: {year}\n\n" - - if source == "arxiv": - categories = result.get("categories", []) - if categories: - markdown += f"**Categories**: {', '.join(categories)}\n\n" - - pdf_url = result.get("pdf_url", "") - if pdf_url: - markdown += f"**PDF**: [{pdf_url}]({pdf_url})\n\n" - + markdown += f"**Authors**: {authors}\n\n" + markdown += f"**Year**: {year}\n\n" + markdown += f"**Score**: {score}\n\n" markdown += "---\n\n" return markdown @@ -169,13 +178,22 @@ class GradioInterface: with gr.Column(scale=1): num_results = gr.Slider( minimum=5, - maximum=30, - value=10, + maximum=50, + value=20, step=5, - label="Number of Results" + label="Results Per Engine" ) search_button = gr.Button("Search", variant="primary") + gr.Examples( + examples=[ + ["What are the latest advancements in quantum computing?"], + ["Compare transformer and RNN architectures for NLP tasks"], + ["Explain the environmental impact of electric vehicles"] + ], + inputs=query_input + ) + with gr.Row(): with gr.Column(): results_output = gr.Markdown(label="Results") @@ -193,18 +211,6 @@ class GradioInterface: outputs=[results_output, file_output] ) - # Examples - gr.Examples( - [ - ["What are the latest advancements in quantum computing?"], - ["Compare transformer and RNN architectures for NLP tasks"], - ["Explain the environmental impact of electric vehicles"], - ["What are the most effective treatments for depression?"], - ["How does climate change affect biodiversity?"] - ], - inputs=[query_input] - ) - return interface def launch(self, **kwargs):