183 lines
6.3 KiB
Python
183 lines
6.3 KiB
Python
"""
|
|
Search service for the sim-search API.
|
|
|
|
This module provides services for search execution and result management.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Optional, Union
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import settings
|
|
from app.db.models import Search
|
|
|
|
# Add sim-search to the python path
|
|
sim_search_path = Path(settings.SIM_SEARCH_PATH)
|
|
sys.path.append(str(sim_search_path))
|
|
|
|
# Import sim-search components
|
|
from execution.search_executor import SearchExecutor
|
|
from execution.result_collector import ResultCollector
|
|
|
|
|
|
class SearchService:
|
|
"""
|
|
Service for search execution and result management.
|
|
|
|
This class provides methods to execute searches and manage search results
|
|
using the sim-search search execution functionality.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the search service."""
|
|
self.search_executor = SearchExecutor()
|
|
self.result_collector = ResultCollector()
|
|
|
|
async def get_available_search_engines(self) -> List[str]:
|
|
"""
|
|
Get a list of available search engines.
|
|
|
|
Returns:
|
|
List of available search engine names
|
|
"""
|
|
return self.search_executor.get_available_search_engines()
|
|
|
|
async def execute_search(
|
|
self,
|
|
structured_query: Dict[str, Any],
|
|
search_engines: Optional[List[str]] = None,
|
|
num_results: Optional[int] = 10,
|
|
timeout: Optional[int] = 30,
|
|
user_id: Optional[str] = None,
|
|
db: Optional[Session] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Execute a search with the given parameters.
|
|
|
|
Args:
|
|
structured_query: Structured query
|
|
search_engines: List of search engines to use
|
|
num_results: Number of results to return per search engine
|
|
timeout: Timeout in seconds
|
|
user_id: User ID for storing the search
|
|
db: Database session
|
|
|
|
Returns:
|
|
Search results
|
|
"""
|
|
# Start timing
|
|
start_time = time.time()
|
|
|
|
# Make sure structured_query is not None
|
|
if structured_query is None:
|
|
structured_query = {}
|
|
|
|
# Add search engines if not specified
|
|
if not search_engines:
|
|
search_engines = self.search_executor.get_available_search_engines()
|
|
structured_query["search_engines"] = search_engines
|
|
|
|
# Ensure all required fields are present
|
|
original_query = structured_query.get("original_query", "")
|
|
|
|
# Add raw_query field (required by search_executor)
|
|
structured_query["raw_query"] = structured_query.get("raw_query", original_query)
|
|
|
|
# Add enhanced_query if missing
|
|
if "enhanced_query" not in structured_query:
|
|
structured_query["enhanced_query"] = original_query
|
|
|
|
# Make sure search_queries is not None (required by search_executor)
|
|
if "search_queries" not in structured_query or structured_query["search_queries"] is None:
|
|
structured_query["search_queries"] = {}
|
|
|
|
# Execute the search with the fixed structured_query
|
|
search_results = self.search_executor.execute_search(
|
|
structured_query=structured_query,
|
|
search_engines=search_engines,
|
|
num_results=num_results,
|
|
timeout=timeout
|
|
)
|
|
|
|
# Calculate execution time
|
|
execution_time = time.time() - start_time
|
|
|
|
# Process results
|
|
processed_results = self.result_collector.process_results(
|
|
search_results, dedup=True, max_results=None, use_reranker=True
|
|
)
|
|
|
|
# Create search record if user_id and db are provided
|
|
search_id = None
|
|
if user_id and db:
|
|
# Create search record
|
|
engines_str = ",".join(search_engines) if search_engines else ""
|
|
search = Search(
|
|
user_id=user_id,
|
|
query=structured_query.get("original_query", ""),
|
|
enhanced_query=structured_query.get("enhanced_query", ""),
|
|
query_type=structured_query.get("type", ""),
|
|
engines=engines_str,
|
|
results_count=len(processed_results),
|
|
results=processed_results,
|
|
)
|
|
|
|
db.add(search)
|
|
db.commit()
|
|
db.refresh(search)
|
|
|
|
search_id = search.id
|
|
|
|
# Format the response
|
|
return {
|
|
"search_id": search_id,
|
|
"query": structured_query.get("original_query", ""),
|
|
"enhanced_query": structured_query.get("enhanced_query", ""),
|
|
"results": {engine: results for engine, results in search_results.items()},
|
|
"total_results": sum(len(results) for results in search_results.values()),
|
|
"execution_time": execution_time,
|
|
}
|
|
|
|
async def get_search_results(self, search: Search) -> Dict[str, Any]:
|
|
"""
|
|
Get results for a specific search.
|
|
|
|
Args:
|
|
search: Search record
|
|
|
|
Returns:
|
|
Search results
|
|
"""
|
|
# Parse engines string
|
|
engines = search.engines.split(",") if search.engines else []
|
|
|
|
# Get results from the database - ensure they are in correct format
|
|
results = {}
|
|
|
|
# Check if results are already in engine->list format or just a flat list
|
|
if isinstance(search.results, dict):
|
|
# Already in the correct format
|
|
results = search.results
|
|
else:
|
|
# Need to convert from flat list to engine->list format
|
|
# Group by source
|
|
for result in search.results:
|
|
source = result.get("source", "unknown")
|
|
if source not in results:
|
|
results[source] = []
|
|
results[source].append(result)
|
|
|
|
# Format the response
|
|
return {
|
|
"search_id": search.id,
|
|
"query": search.query,
|
|
"enhanced_query": search.enhanced_query,
|
|
"results": results,
|
|
"total_results": search.results_count,
|
|
"execution_time": 0.0, # Not available for stored searches
|
|
}
|