""" Integration test for query classification and search execution. This test demonstrates how the LLM-based query domain classification affects the search engines selected for different types of queries. """ import os import sys import json import asyncio from typing import Dict, Any, List # Add parent directory to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))) from query.query_processor import get_query_processor from execution.search_executor import get_search_executor async def test_query_classification_search_integration(): """Test how query classification affects search engine selection.""" query_processor = get_query_processor() search_executor = get_search_executor() # Test queries for different domains test_queries = [ { "description": "Academic query about quantum computing", "query": "What are the latest theoretical advances in quantum computing algorithms?" }, { "description": "Code query about implementing a neural network", "query": "How do I implement a convolutional neural network in TensorFlow?" }, { "description": "Current events query about economic policy", "query": "What are the recent changes to Federal Reserve interest rates and their economic impact?" }, { "description": "Mixed query with academic and code aspects", "query": "How are transformer models being implemented for natural language processing tasks?" } ] results = [] for test_case in test_queries: query = test_case["query"] description = test_case["description"] print(f"\n=== Testing: {description} ===") print(f"Query: {query}") # Process the query structured_query = await query_processor.process_query(query) # Get domain classification results domain = structured_query.get('domain', 'general') domain_confidence = structured_query.get('domain_confidence', 0.0) is_academic = structured_query.get('is_academic', False) is_code = structured_query.get('is_code', False) is_current_events = structured_query.get('is_current_events', False) print(f"Domain: {domain} (confidence: {domain_confidence})") print(f"Is academic: {is_academic}") print(f"Is code: {is_code}") print(f"Is current events: {is_current_events}") # Execute search with default search engines based on classification search_results = await search_executor.execute_search(structured_query) # Get the search engines that were selected selected_engines = list(search_results.keys()) print(f"Selected search engines: {selected_engines}") # Store the results result = { "query": query, "description": description, "domain": domain, "domain_confidence": domain_confidence, "is_academic": is_academic, "is_code": is_code, "is_current_events": is_current_events, "selected_engines": selected_engines, "num_results_per_engine": {engine: len(results) for engine, results in search_results.items()} } results.append(result) # Save results to a file with open('query_classification_search_results.json', 'w') as f: json.dump(results, indent=2, fp=f) print(f"\nResults saved to query_classification_search_results.json") if __name__ == "__main__": asyncio.run(test_query_classification_search_integration())