112 lines
3.5 KiB
Python
112 lines
3.5 KiB
Python
"""
|
|
Query processor module for the intelligent research system.
|
|
|
|
This module handles the processing of user queries, including enhancement,
|
|
classification, and structuring for downstream modules.
|
|
"""
|
|
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
from .llm_interface import get_llm_interface
|
|
|
|
|
|
class QueryProcessor:
|
|
"""
|
|
Processor for user research queries.
|
|
|
|
This class handles the processing of user queries, including enhancement,
|
|
classification, and structuring for downstream modules.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize the query processor."""
|
|
self.llm_interface = get_llm_interface()
|
|
|
|
async def process_query(self, query: str) -> Dict[str, Any]:
|
|
"""
|
|
Process a user query.
|
|
|
|
Args:
|
|
query: The raw user query
|
|
|
|
Returns:
|
|
Dictionary containing the processed query information
|
|
"""
|
|
# Enhance the query
|
|
enhanced_query = await self.llm_interface.enhance_query(query)
|
|
|
|
# Classify the query
|
|
classification = await self.llm_interface.classify_query(query)
|
|
|
|
# Extract entities from the classification
|
|
entities = classification.get('entities', [])
|
|
|
|
# Structure the query for downstream modules
|
|
structured_query = self._structure_query(query, enhanced_query, classification)
|
|
|
|
return structured_query
|
|
|
|
def _structure_query(self, original_query: str, enhanced_query: str,
|
|
classification: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Structure a query for downstream modules.
|
|
|
|
Args:
|
|
original_query: The original user query
|
|
enhanced_query: The enhanced query
|
|
classification: The query classification
|
|
|
|
Returns:
|
|
Dictionary containing the structured query
|
|
"""
|
|
return {
|
|
'original_query': original_query,
|
|
'enhanced_query': enhanced_query,
|
|
'type': classification.get('type', 'unknown'),
|
|
'intent': classification.get('intent', 'research'),
|
|
'entities': classification.get('entities', []),
|
|
'timestamp': None, # Will be filled in by the caller
|
|
'metadata': {
|
|
'classification': classification
|
|
}
|
|
}
|
|
|
|
async def generate_search_queries(self, structured_query: Dict[str, Any],
|
|
search_engines: List[str]) -> Dict[str, Any]:
|
|
"""
|
|
Generate optimized search queries for different search engines.
|
|
|
|
Args:
|
|
structured_query: The structured query
|
|
search_engines: List of search engines to generate queries for
|
|
|
|
Returns:
|
|
Updated structured query with search queries
|
|
"""
|
|
# Use the enhanced query for generating search queries
|
|
enhanced_query = structured_query['enhanced_query']
|
|
|
|
# Generate search queries for each engine
|
|
search_queries = await self.llm_interface.generate_search_queries(
|
|
enhanced_query, search_engines
|
|
)
|
|
|
|
# Add search queries to the structured query
|
|
structured_query['search_queries'] = search_queries
|
|
|
|
return structured_query
|
|
|
|
|
|
# Create a singleton instance for global use
|
|
query_processor = QueryProcessor()
|
|
|
|
|
|
def get_query_processor() -> QueryProcessor:
|
|
"""
|
|
Get the global query processor instance.
|
|
|
|
Returns:
|
|
QueryProcessor instance
|
|
"""
|
|
return query_processor
|