""" Search service for the sim-search API. This module provides services for search execution and result management. """ import sys import os import time import json from pathlib import Path from typing import Dict, Any, List, Optional, Union from sqlalchemy.orm import Session from app.core.config import settings from app.db.models import Search # Add sim-search to the python path sim_search_path = Path(settings.SIM_SEARCH_PATH) sys.path.append(str(sim_search_path)) # Import sim-search components from execution.search_executor import SearchExecutor from execution.result_collector import ResultCollector class SearchService: """ Service for search execution and result management. This class provides methods to execute searches and manage search results using the sim-search search execution functionality. """ def __init__(self): """Initialize the search service.""" self.search_executor = SearchExecutor() self.result_collector = ResultCollector() async def get_available_search_engines(self) -> List[str]: """ Get a list of available search engines. Returns: List of available search engine names """ return self.search_executor.get_available_search_engines() async def execute_search( self, structured_query: Dict[str, Any], search_engines: Optional[List[str]] = None, num_results: Optional[int] = 10, timeout: Optional[int] = 30, user_id: Optional[str] = None, db: Optional[Session] = None, ) -> Dict[str, Any]: """ Execute a search with the given parameters. Args: structured_query: Structured query search_engines: List of search engines to use num_results: Number of results to return per search engine timeout: Timeout in seconds user_id: User ID for storing the search db: Database session Returns: Search results """ # Start timing start_time = time.time() # Make sure structured_query is not None if structured_query is None: structured_query = {} # Add search engines if not specified if not search_engines: search_engines = self.search_executor.get_available_search_engines() structured_query["search_engines"] = search_engines # Ensure all required fields are present original_query = structured_query.get("original_query", "") # Add raw_query field (required by search_executor) structured_query["raw_query"] = structured_query.get("raw_query", original_query) # Add enhanced_query if missing if "enhanced_query" not in structured_query: structured_query["enhanced_query"] = original_query # Make sure search_queries is not None (required by search_executor) if "search_queries" not in structured_query or structured_query["search_queries"] is None: structured_query["search_queries"] = {} # Execute the search with the fixed structured_query search_results = self.search_executor.execute_search( structured_query=structured_query, search_engines=search_engines, num_results=num_results, timeout=timeout ) # Calculate execution time execution_time = time.time() - start_time # Process results processed_results = self.result_collector.process_results( search_results, dedup=True, max_results=None, use_reranker=True ) # Create search record if user_id and db are provided search_id = None if user_id and db: # Create search record engines_str = ",".join(search_engines) if search_engines else "" search = Search( user_id=user_id, query=structured_query.get("original_query", ""), enhanced_query=structured_query.get("enhanced_query", ""), query_type=structured_query.get("type", ""), engines=engines_str, results_count=len(processed_results), results=processed_results, ) db.add(search) db.commit() db.refresh(search) search_id = search.id # Format the response return { "search_id": search_id, "query": structured_query.get("original_query", ""), "enhanced_query": structured_query.get("enhanced_query", ""), "results": {engine: results for engine, results in search_results.items()}, "total_results": sum(len(results) for results in search_results.values()), "execution_time": execution_time, } async def get_search_results(self, search: Search) -> Dict[str, Any]: """ Get results for a specific search. Args: search: Search record Returns: Search results """ # Parse engines string engines = search.engines.split(",") if search.engines else [] # Get results from the database - ensure they are in correct format results = {} # Check if results are already in engine->list format or just a flat list if isinstance(search.results, dict): # Already in the correct format results = search.results else: # Need to convert from flat list to engine->list format # Group by source for result in search.results: source = result.get("source", "unknown") if source not in results: results[source] = [] results[source].append(result) # Format the response return { "search_id": search.id, "query": search.query, "enhanced_query": search.enhanced_query, "results": results, "total_results": search.results_count, "execution_time": 0.0, # Not available for stored searches }