""" Sub-question search executor module. This module handles the execution of search queries for decomposed sub-questions, aggregating results from multiple search engines. """ import os import time import asyncio from typing import Dict, List, Any, Optional, Union import logging import concurrent.futures from config.config import get_config from .search_executor import SearchExecutor # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class SubQuestionExecutor: """ Executes search queries for sub-questions and aggregates results. """ def __init__(self): """Initialize the sub-question executor.""" self.search_executor = SearchExecutor() self.config = get_config() async def execute_sub_question_searches(self, structured_query: Dict[str, Any], num_results_per_engine: int = 5, timeout: int = 60) -> Dict[str, Any]: """ Execute searches for all sub-questions in a structured query. Args: structured_query: The structured query containing sub-questions num_results_per_engine: Number of results to return per search engine for each sub-question timeout: Timeout in seconds for each sub-question's searches Returns: Updated structured query with sub-question search results """ # Extract sub-questions from the structured query sub_questions = structured_query.get('sub_questions', []) if not sub_questions: logger.info("No sub-questions found in the structured query") return structured_query logger.info(f"Executing searches for {len(sub_questions)} sub-questions") # Get available search engines available_engines = self.search_executor.get_available_search_engines() # Dictionary to store results for each sub-question sub_question_results = [] # Process sub-questions sequentially to avoid overwhelming APIs for i, sq in enumerate(sub_questions): sub_q_text = sq.get('sub_question', '') aspect = sq.get('aspect', 'unknown') priority = sq.get('priority', 3) search_queries = sq.get('search_queries', {}) if not sub_q_text: continue logger.info(f"Processing sub-question {i+1}/{len(sub_questions)}: {sub_q_text}") # Create a mini structured query for this sub-question mini_query = { 'original_query': sub_q_text, 'enhanced_query': sub_q_text, 'search_queries': search_queries, 'is_current_events': structured_query.get('is_current_events', False), 'is_academic': structured_query.get('is_academic', False), 'is_code': structured_query.get('is_code', False) } # Execute search for this sub-question try: # Use fewer results per engine for sub-questions to keep total result count manageable sq_results = self.search_executor.execute_search( structured_query=mini_query, num_results=num_results_per_engine, timeout=timeout ) # Log results for each engine for engine, results in sq_results.items(): logger.info(f" Engine {engine} returned {len(results)} results") # Store results with sub-question metadata sq_with_results = sq.copy() sq_with_results['search_results'] = sq_results sq_with_results['search_result_count'] = sum(len(results) for results in sq_results.values()) sub_question_results.append(sq_with_results) # Add a small delay between sub-questions to avoid rate limiting if i < len(sub_questions) - 1: await asyncio.sleep(1) except Exception as e: logger.error(f"Error executing search for sub-question: {str(e)}") # Add empty results if there was an error sq_with_results = sq.copy() sq_with_results['search_results'] = {} sq_with_results['search_result_count'] = 0 sq_with_results['error'] = str(e) sub_question_results.append(sq_with_results) # Update the structured query with the results structured_query['sub_questions'] = sub_question_results # Calculate total results total_results = sum(sq.get('search_result_count', 0) for sq in sub_question_results) logger.info(f"Completed searches for all sub-questions. Total results: {total_results}") return structured_query def get_combined_results(self, structured_query: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]: """ Get a combined view of results from all sub-questions. Args: structured_query: The structured query with sub-question search results Returns: Dictionary mapping search engine names to lists of results """ sub_questions = structured_query.get('sub_questions', []) if not sub_questions: return {} # Dictionary to store combined results combined_results = {} # Process each sub-question for sq in sub_questions: sub_q_text = sq.get('sub_question', '') aspect = sq.get('aspect', 'unknown') priority = sq.get('priority', 3) search_results = sq.get('search_results', {}) # Process results from each engine for engine, results in search_results.items(): if engine not in combined_results: combined_results[engine] = [] # Add sub-question metadata to each result for result in results: if result and isinstance(result, dict): # Only add metadata if it doesn't already exist if 'sub_question' not in result: result['sub_question'] = sub_q_text if 'aspect' not in result: result['aspect'] = aspect if 'priority' not in result: result['priority'] = priority # Add the result to the combined results combined_results[engine].append(result) return combined_results def prioritize_results(self, combined_results: Dict[str, List[Dict[str, Any]]], max_results_per_engine: int = 10) -> Dict[str, List[Dict[str, Any]]]: """ Prioritize results based on sub-question priority. Args: combined_results: Combined results from all sub-questions max_results_per_engine: Maximum number of results to keep per engine Returns: Dictionary mapping search engine names to prioritized lists of results """ prioritized_results = {} # Process each engine's results for engine, results in combined_results.items(): # Sort results by priority (lower number = higher priority) sorted_results = sorted(results, key=lambda r: r.get('priority', 5)) # Keep only the top N results prioritized_results[engine] = sorted_results[:max_results_per_engine] return prioritized_results # Create a singleton instance for global use sub_question_executor = SubQuestionExecutor() def get_sub_question_executor() -> SubQuestionExecutor: """ Get the global sub-question executor instance. Returns: SubQuestionExecutor instance """ return sub_question_executor