ira/sim-search-api/app/services/search_service.py

147 lines
4.7 KiB
Python

"""
Search service for the sim-search API.
This module provides services for search execution and result management.
"""
import sys
import os
import time
import json
from pathlib import Path
from typing import Dict, Any, List, Optional, Union
from sqlalchemy.orm import Session
from app.core.config import settings
from app.db.models import Search
# Add sim-search to the python path
sim_search_path = Path(settings.SIM_SEARCH_PATH)
sys.path.append(str(sim_search_path))
# Import sim-search components
from execution.search_executor import SearchExecutor
from execution.result_collector import ResultCollector
class SearchService:
"""
Service for search execution and result management.
This class provides methods to execute searches and manage search results
using the sim-search search execution functionality.
"""
def __init__(self):
"""Initialize the search service."""
self.search_executor = SearchExecutor()
self.result_collector = ResultCollector()
async def get_available_search_engines(self) -> List[str]:
"""
Get a list of available search engines.
Returns:
List of available search engine names
"""
return self.search_executor.get_available_search_engines()
async def execute_search(
self,
structured_query: Dict[str, Any],
search_engines: Optional[List[str]] = None,
num_results: Optional[int] = 10,
timeout: Optional[int] = 30,
user_id: Optional[str] = None,
db: Optional[Session] = None,
) -> Dict[str, Any]:
"""
Execute a search with the given parameters.
Args:
structured_query: Structured query
search_engines: List of search engines to use
num_results: Number of results to return per search engine
timeout: Timeout in seconds
user_id: User ID for storing the search
db: Database session
Returns:
Search results
"""
# Start timing
start_time = time.time()
# Add search engines if not specified
if not search_engines:
search_engines = self.search_executor.get_available_search_engines()
structured_query["search_engines"] = search_engines
# Execute the search
search_results = self.search_executor.execute_search(
structured_query=structured_query,
num_results=num_results
)
# Calculate execution time
execution_time = time.time() - start_time
# Process results
processed_results = self.result_collector.process_results(
search_results, dedup=True, max_results=None, use_reranker=True
)
# Create search record if user_id and db are provided
search_id = None
if user_id and db:
# Create search record
engines_str = ",".join(search_engines) if search_engines else ""
search = Search(
user_id=user_id,
query=structured_query.get("original_query", ""),
enhanced_query=structured_query.get("enhanced_query", ""),
query_type=structured_query.get("type", ""),
engines=engines_str,
results_count=len(processed_results),
results=processed_results,
)
db.add(search)
db.commit()
db.refresh(search)
search_id = search.id
# Format the response
return {
"search_id": search_id,
"query": structured_query.get("original_query", ""),
"enhanced_query": structured_query.get("enhanced_query", ""),
"results": {engine: results for engine, results in search_results.items()},
"total_results": sum(len(results) for results in search_results.values()),
"execution_time": execution_time,
}
async def get_search_results(self, search: Search) -> Dict[str, Any]:
"""
Get results for a specific search.
Args:
search: Search record
Returns:
Search results
"""
# Parse engines string
engines = search.engines.split(",") if search.engines else []
# Format the response
return {
"search_id": search.id,
"query": search.query,
"enhanced_query": search.enhanced_query,
"results": search.results,
"total_results": search.results_count,
"execution_time": 0.0, # Not available for stored searches
}