""" Search service for the sim-search API. This module provides services for search execution and result management. """ import sys import os import time import json from pathlib import Path from typing import Dict, Any, List, Optional, Union from sqlalchemy.orm import Session from app.core.config import settings from app.db.models import Search # Add sim-search to the python path sim_search_path = Path(settings.SIM_SEARCH_PATH) sys.path.append(str(sim_search_path)) # Import sim-search components from execution.search_executor import SearchExecutor from execution.result_collector import ResultCollector class SearchService: """ Service for search execution and result management. This class provides methods to execute searches and manage search results using the sim-search search execution functionality. """ def __init__(self): """Initialize the search service.""" self.search_executor = SearchExecutor() self.result_collector = ResultCollector() async def get_available_search_engines(self) -> List[str]: """ Get a list of available search engines. Returns: List of available search engine names """ return self.search_executor.get_available_search_engines() async def execute_search( self, structured_query: Dict[str, Any], search_engines: Optional[List[str]] = None, num_results: Optional[int] = 10, timeout: Optional[int] = 30, user_id: Optional[str] = None, db: Optional[Session] = None, ) -> Dict[str, Any]: """ Execute a search with the given parameters. Args: structured_query: Structured query search_engines: List of search engines to use num_results: Number of results to return per search engine timeout: Timeout in seconds user_id: User ID for storing the search db: Database session Returns: Search results """ # Start timing start_time = time.time() # Add search engines if not specified if not search_engines: search_engines = self.search_executor.get_available_search_engines() structured_query["search_engines"] = search_engines # Execute the search search_results = self.search_executor.execute_search( structured_query=structured_query, num_results=num_results ) # Calculate execution time execution_time = time.time() - start_time # Process results processed_results = self.result_collector.process_results( search_results, dedup=True, max_results=None, use_reranker=True ) # Create search record if user_id and db are provided search_id = None if user_id and db: # Create search record engines_str = ",".join(search_engines) if search_engines else "" search = Search( user_id=user_id, query=structured_query.get("original_query", ""), enhanced_query=structured_query.get("enhanced_query", ""), query_type=structured_query.get("type", ""), engines=engines_str, results_count=len(processed_results), results=processed_results, ) db.add(search) db.commit() db.refresh(search) search_id = search.id # Format the response return { "search_id": search_id, "query": structured_query.get("original_query", ""), "enhanced_query": structured_query.get("enhanced_query", ""), "results": {engine: results for engine, results in search_results.items()}, "total_results": sum(len(results) for results in search_results.values()), "execution_time": execution_time, } async def get_search_results(self, search: Search) -> Dict[str, Any]: """ Get results for a specific search. Args: search: Search record Returns: Search results """ # Parse engines string engines = search.engines.split(",") if search.engines else [] # Format the response return { "search_id": search.id, "query": search.query, "enhanced_query": search.enhanced_query, "results": search.results, "total_results": search.results_count, "execution_time": 0.0, # Not available for stored searches }