ira/tests/integration/test_query_classification_s...

101 lines
3.7 KiB
Python

"""
Integration test for query classification and search execution.
This test demonstrates how the LLM-based query domain classification
affects the search engines selected for different types of queries.
"""
import os
import sys
import json
import asyncio
from typing import Dict, Any, List
# Add parent directory to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from query.query_processor import get_query_processor
from execution.search_executor import get_search_executor
async def test_query_classification_search_integration():
"""Test how query classification affects search engine selection."""
query_processor = get_query_processor()
search_executor = get_search_executor()
# Test queries for different domains
test_queries = [
{
"description": "Academic query about quantum computing",
"query": "What are the latest theoretical advances in quantum computing algorithms?"
},
{
"description": "Code query about implementing a neural network",
"query": "How do I implement a convolutional neural network in TensorFlow?"
},
{
"description": "Current events query about economic policy",
"query": "What are the recent changes to Federal Reserve interest rates and their economic impact?"
},
{
"description": "Mixed query with academic and code aspects",
"query": "How are transformer models being implemented for natural language processing tasks?"
}
]
results = []
for test_case in test_queries:
query = test_case["query"]
description = test_case["description"]
print(f"\n=== Testing: {description} ===")
print(f"Query: {query}")
# Process the query
structured_query = await query_processor.process_query(query)
# Get domain classification results
domain = structured_query.get('domain', 'general')
domain_confidence = structured_query.get('domain_confidence', 0.0)
is_academic = structured_query.get('is_academic', False)
is_code = structured_query.get('is_code', False)
is_current_events = structured_query.get('is_current_events', False)
print(f"Domain: {domain} (confidence: {domain_confidence})")
print(f"Is academic: {is_academic}")
print(f"Is code: {is_code}")
print(f"Is current events: {is_current_events}")
# Execute search with default search engines based on classification
search_results = await search_executor.execute_search(structured_query)
# Get the search engines that were selected
selected_engines = list(search_results.keys())
print(f"Selected search engines: {selected_engines}")
# Store the results
result = {
"query": query,
"description": description,
"domain": domain,
"domain_confidence": domain_confidence,
"is_academic": is_academic,
"is_code": is_code,
"is_current_events": is_current_events,
"selected_engines": selected_engines,
"num_results_per_engine": {engine: len(results) for engine, results in search_results.items()}
}
results.append(result)
# Save results to a file
with open('query_classification_search_results.json', 'w') as f:
json.dump(results, indent=2, fp=f)
print(f"\nResults saved to query_classification_search_results.json")
if __name__ == "__main__":
asyncio.run(test_query_classification_search_integration())