ira/sim-search-api/app/services/search_service.py

183 lines
6.3 KiB
Python

"""
Search service for the sim-search API.
This module provides services for search execution and result management.
"""
import sys
import os
import time
import json
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from sqlalchemy.orm import Session
from app.core.config import settings
from app.db.models import Search
# Add sim-search to the python path
sim_search_path = Path(settings.SIM_SEARCH_PATH)
sys.path.append(str(sim_search_path))
# Import sim-search components
from execution.search_executor import SearchExecutor
from execution.result_collector import ResultCollector
class SearchService:
"""
Service for search execution and result management.
This class provides methods to execute searches and manage search results
using the sim-search search execution functionality.
"""
def __init__(self):
"""Initialize the search service."""
self.search_executor = SearchExecutor()
self.result_collector = ResultCollector()
async def get_available_search_engines(self) -> List[str]:
"""
Get a list of available search engines.
Returns:
List of available search engine names
"""
return self.search_executor.get_available_search_engines()
async def execute_search(
self,
structured_query: Dict[str, Any],
search_engines: Optional[List[str]] = None,
num_results: Optional[int] = 10,
timeout: Optional[int] = 30,
user_id: Optional[str] = None,
db: Optional[Session] = None,
) -> Dict[str, Any]:
"""
Execute a search with the given parameters.
Args:
structured_query: Structured query
search_engines: List of search engines to use
num_results: Number of results to return per search engine
timeout: Timeout in seconds
user_id: User ID for storing the search
db: Database session
Returns:
Search results
"""
# Start timing
start_time = time.time()
# Make sure structured_query is not None
if structured_query is None:
structured_query = {}
# Add search engines if not specified
if not search_engines:
search_engines = self.search_executor.get_available_search_engines()
structured_query["search_engines"] = search_engines
# Ensure all required fields are present
original_query = structured_query.get("original_query", "")
# Add raw_query field (required by search_executor)
structured_query["raw_query"] = structured_query.get("raw_query", original_query)
# Add enhanced_query if missing
if "enhanced_query" not in structured_query:
structured_query["enhanced_query"] = original_query
# Make sure search_queries is not None (required by search_executor)
if "search_queries" not in structured_query or structured_query["search_queries"] is None:
structured_query["search_queries"] = {}
# Execute the search with the fixed structured_query
search_results = self.search_executor.execute_search(
structured_query=structured_query,
search_engines=search_engines,
num_results=num_results,
timeout=timeout
)
# Calculate execution time
execution_time = time.time() - start_time
# Process results
processed_results = self.result_collector.process_results(
search_results, dedup=True, max_results=None, use_reranker=True
)
# Create search record if user_id and db are provided
search_id = None
if user_id and db:
# Create search record
engines_str = ",".join(search_engines) if search_engines else ""
search = Search(
user_id=user_id,
query=structured_query.get("original_query", ""),
enhanced_query=structured_query.get("enhanced_query", ""),
query_type=structured_query.get("type", ""),
engines=engines_str,
results_count=len(processed_results),
results=processed_results,
)
db.add(search)
db.commit()
db.refresh(search)
search_id = search.id
# Format the response
return {
"search_id": search_id,
"query": structured_query.get("original_query", ""),
"enhanced_query": structured_query.get("enhanced_query", ""),
"results": {engine: results for engine, results in search_results.items()},
"total_results": sum(len(results) for results in search_results.values()),
"execution_time": execution_time,
}
async def get_search_results(self, search: Search) -> Dict[str, Any]:
"""
Get results for a specific search.
Args:
search: Search record
Returns:
Search results
"""
# Parse engines string
engines = search.engines.split(",") if search.engines else []
# Get results from the database - ensure they are in correct format
results = {}
# Check if results are already in engine->list format or just a flat list
if isinstance(search.results, dict):
# Already in the correct format
results = search.results
else:
# Need to convert from flat list to engine->list format
# Group by source
for result in search.results:
source = result.get("source", "unknown")
if source not in results:
results[source] = []
results[source].append(result)
# Format the response
return {
"search_id": search.id,
"query": search.query,
"enhanced_query": search.enhanced_query,
"results": results,
"total_results": search.results_count,
"execution_time": 0.0, # Not available for stored searches
}