ira/query/query_processor.py

112 lines
3.5 KiB
Python

"""
Query processor module for the intelligent research system.
This module handles the processing of user queries, including enhancement,
classification, and structuring for downstream modules.
"""
from typing import Dict, Any, List, Optional
from .llm_interface import get_llm_interface
class QueryProcessor:
"""
Processor for user research queries.
This class handles the processing of user queries, including enhancement,
classification, and structuring for downstream modules.
"""
def __init__(self):
"""Initialize the query processor."""
self.llm_interface = get_llm_interface()
async def process_query(self, query: str) -> Dict[str, Any]:
"""
Process a user query.
Args:
query: The raw user query
Returns:
Dictionary containing the processed query information
"""
# Enhance the query
enhanced_query = await self.llm_interface.enhance_query(query)
# Classify the query
classification = await self.llm_interface.classify_query(query)
# Extract entities from the classification
entities = classification.get('entities', [])
# Structure the query for downstream modules
structured_query = self._structure_query(query, enhanced_query, classification)
return structured_query
def _structure_query(self, original_query: str, enhanced_query: str,
classification: Dict[str, Any]) -> Dict[str, Any]:
"""
Structure a query for downstream modules.
Args:
original_query: The original user query
enhanced_query: The enhanced query
classification: The query classification
Returns:
Dictionary containing the structured query
"""
return {
'original_query': original_query,
'enhanced_query': enhanced_query,
'type': classification.get('type', 'unknown'),
'intent': classification.get('intent', 'research'),
'entities': classification.get('entities', []),
'timestamp': None, # Will be filled in by the caller
'metadata': {
'classification': classification
}
}
async def generate_search_queries(self, structured_query: Dict[str, Any],
search_engines: List[str]) -> Dict[str, Any]:
"""
Generate optimized search queries for different search engines.
Args:
structured_query: The structured query
search_engines: List of search engines to generate queries for
Returns:
Updated structured query with search queries
"""
# Use the enhanced query for generating search queries
enhanced_query = structured_query['enhanced_query']
# Generate search queries for each engine
search_queries = await self.llm_interface.generate_search_queries(
enhanced_query, search_engines
)
# Add search queries to the structured query
structured_query['search_queries'] = search_queries
return structured_query
# Create a singleton instance for global use
query_processor = QueryProcessor()
def get_query_processor() -> QueryProcessor:
"""
Get the global query processor instance.
Returns:
QueryProcessor instance
"""
return query_processor