ira/sim-search-api/app/services/query_service.py

86 lines
2.6 KiB
Python

"""
Query service for the sim-search API.
This module provides services for query processing and classification.
"""
import sys
import os
from pathlib import Path
from typing import Dict, Any, List, Optional
from app.core.config import settings
# Add sim-search to the python path
sim_search_path = Path(settings.SIM_SEARCH_PATH)
sys.path.append(str(sim_search_path))
# Import sim-search components
from query.query_processor import QueryProcessor
from query.llm_interface import LLMInterface
class QueryService:
"""
Service for query processing and classification.
This class provides methods to process and classify queries using
the sim-search query processing functionality.
"""
def __init__(self):
"""Initialize the query service."""
self.query_processor = QueryProcessor()
self.llm_interface = LLMInterface()
async def process_query(self, query: str) -> Dict[str, Any]:
"""
Process a query to enhance and structure it.
Args:
query: Query to process
Returns:
Processed query with structured information
"""
# Process the query using the sim-search query processor
structured_query = await self.query_processor.process_query(query)
# Format the response
return {
"original_query": query,
"structured_query": structured_query
}
async def classify_query(self, query: str) -> Dict[str, Any]:
"""
Classify a query by type and intent.
Args:
query: Query to classify
Returns:
Classified query with type and intent information
"""
# Classify the query using the sim-search LLM interface
classification = await self.llm_interface.classify_query_domain(query)
# Create a structured query with the classification
structured_query = {
"original_query": query,
"type": classification.get("type"),
"intent": classification.get("intent"),
"domain": classification.get("domain"),
"confidence": classification.get("confidence"),
"reasoning": classification.get("reasoning"),
"is_academic": classification.get("is_academic", False),
"is_code": classification.get("is_code", False),
"is_current_events": classification.get("is_current_events", False)
}
# Format the response
return {
"original_query": query,
"structured_query": structured_query
}