101 lines
3.7 KiB
Python
101 lines
3.7 KiB
Python
"""
|
|
Integration test for query classification and search execution.
|
|
|
|
This test demonstrates how the LLM-based query domain classification
|
|
affects the search engines selected for different types of queries.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import asyncio
|
|
from typing import Dict, Any, List
|
|
|
|
# Add parent directory to path
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
|
|
|
from query.query_processor import get_query_processor
|
|
from execution.search_executor import get_search_executor
|
|
|
|
|
|
async def test_query_classification_search_integration():
|
|
"""Test how query classification affects search engine selection."""
|
|
query_processor = get_query_processor()
|
|
search_executor = get_search_executor()
|
|
|
|
# Test queries for different domains
|
|
test_queries = [
|
|
{
|
|
"description": "Academic query about quantum computing",
|
|
"query": "What are the latest theoretical advances in quantum computing algorithms?"
|
|
},
|
|
{
|
|
"description": "Code query about implementing a neural network",
|
|
"query": "How do I implement a convolutional neural network in TensorFlow?"
|
|
},
|
|
{
|
|
"description": "Current events query about economic policy",
|
|
"query": "What are the recent changes to Federal Reserve interest rates and their economic impact?"
|
|
},
|
|
{
|
|
"description": "Mixed query with academic and code aspects",
|
|
"query": "How are transformer models being implemented for natural language processing tasks?"
|
|
}
|
|
]
|
|
|
|
results = []
|
|
|
|
for test_case in test_queries:
|
|
query = test_case["query"]
|
|
description = test_case["description"]
|
|
|
|
print(f"\n=== Testing: {description} ===")
|
|
print(f"Query: {query}")
|
|
|
|
# Process the query
|
|
structured_query = await query_processor.process_query(query)
|
|
|
|
# Get domain classification results
|
|
domain = structured_query.get('domain', 'general')
|
|
domain_confidence = structured_query.get('domain_confidence', 0.0)
|
|
is_academic = structured_query.get('is_academic', False)
|
|
is_code = structured_query.get('is_code', False)
|
|
is_current_events = structured_query.get('is_current_events', False)
|
|
|
|
print(f"Domain: {domain} (confidence: {domain_confidence})")
|
|
print(f"Is academic: {is_academic}")
|
|
print(f"Is code: {is_code}")
|
|
print(f"Is current events: {is_current_events}")
|
|
|
|
# Execute search with default search engines based on classification
|
|
search_results = await search_executor.execute_search(structured_query)
|
|
|
|
# Get the search engines that were selected
|
|
selected_engines = list(search_results.keys())
|
|
print(f"Selected search engines: {selected_engines}")
|
|
|
|
# Store the results
|
|
result = {
|
|
"query": query,
|
|
"description": description,
|
|
"domain": domain,
|
|
"domain_confidence": domain_confidence,
|
|
"is_academic": is_academic,
|
|
"is_code": is_code,
|
|
"is_current_events": is_current_events,
|
|
"selected_engines": selected_engines,
|
|
"num_results_per_engine": {engine: len(results) for engine, results in search_results.items()}
|
|
}
|
|
|
|
results.append(result)
|
|
|
|
# Save results to a file
|
|
with open('query_classification_search_results.json', 'w') as f:
|
|
json.dump(results, indent=2, fp=f)
|
|
|
|
print(f"\nResults saved to query_classification_search_results.json")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(test_query_classification_search_integration())
|