""" Test script for the search execution module. """ import os import json import time from typing import Dict, List, Any, Optional # Import the necessary modules try: from query.query_processor import get_query_processor, QueryProcessor from query.llm_interface import get_llm_interface from execution.search_executor import SearchExecutor from execution.result_collector import ResultCollector except ImportError as e: print(f"Import error: {e}") print("Make sure all required modules are installed and available.") exit(1) def get_query_processor(): """Get a query processor instance.""" # First set the LLM interface to use Groq's model from query.llm_interface import get_llm_interface get_llm_interface(model_name="llama-3.1-8b-instant") # Then get the query processor which will use the configured LLM interface from query.query_processor import get_query_processor return get_query_processor() def test_search_execution(query: str, search_engines: Optional[List[str]] = None) -> Dict[str, Any]: """ Test the search execution module. Args: query: The query to process and execute search_engines: List of search engines to use (if None, use all available) Returns: Dictionary with test results """ print(f"Testing search execution for query: {query}") # Process the query processor = get_query_processor() start_time = time.time() structured_query = processor.process_query(query) query_time = time.time() - start_time print(f"Query processed in {query_time:.2f} seconds") print(f"Enhanced query: {structured_query.get('enhanced_query', '')}") print(f"Classification: {structured_query.get('classification', {})}") # Execute the search executor = SearchExecutor() # Get available search engines if none specified if search_engines is None: search_engines = executor.get_available_search_engines() print(f"Using available search engines: {search_engines}") # Execute the search start_time = time.time() search_results = executor.execute_search(structured_query, search_engines=search_engines) search_time = time.time() - start_time print(f"Search executed in {search_time:.2f} seconds") # Print raw search results for debugging print("\nRaw search results:") for engine, results in search_results.items(): print(f" {engine}: {len(results)} results") if results: print(f" Sample result: {results[0]}") # Process the results collector = ResultCollector() processed_results = collector.process_results(search_results, dedup=True) # Print summary of results total_results = len(processed_results) print(f"Found {total_results} results after deduplication") # Print results by source results_by_source = {} for result in processed_results: source = result.get("source", "unknown") if source not in results_by_source: results_by_source[source] = 0 results_by_source[source] += 1 print("Results by source:") for source, count in results_by_source.items(): print(f" {source}: {count}") # Print top 3 results if processed_results: print("\nTop 3 results:") for i, result in enumerate(processed_results[:3]): print(f" {i+1}. {result['title']}") print(f" URL: {result['url']}") print(f" Snippet: {result['snippet'][:100]}...") print() # Return test results return { "query": query, "structured_query": structured_query, "search_engines": search_engines, "raw_results": search_results, "processed_results": processed_results, "timing": { "query_processing": query_time, "search_execution": search_time, "total": query_time + search_time }, "summary": { "total_results": total_results, "results_by_source": results_by_source } } def save_test_results(results: Dict[str, Any], file_path: str) -> None: """ Save test results to a file. Args: results: Test results to save file_path: Path to save results to """ try: with open(file_path, 'w') as f: json.dump(results, f, indent=2) print(f"Test results saved to {file_path}") except Exception as e: print(f"Error saving test results: {e}") def mock_test(): """Run a mock test without actual API calls.""" print("Running mock test without API calls...") # Create a mock structured query structured_query = { "original_query": "What are the latest advancements in quantum computing?", "enhanced_query": "Explore the most recent breakthroughs and developments in quantum computing technology, including hardware innovations, quantum algorithms, and potential applications.", "classification": { "type": "exploratory", "intent": "research", "entities": ["quantum computing", "advancements", "technology"] }, "search_queries": { "google": "latest advancements in quantum computing 2025 breakthroughs", "scholar": "recent quantum computing developments research papers", "arxiv": "quantum computing hardware algorithms applications" } } # Create mock search results mock_results = { "google": [ { "title": "Quantum Computing Breakthrough: New Qubit Design Achieves 99.9% Fidelity", "url": "https://example.com/quantum-breakthrough", "snippet": "Researchers at MIT have developed a new qubit design that achieves 99.9% fidelity, a major step toward practical quantum computing.", "position": 1 }, { "title": "IBM Unveils 1000-Qubit Quantum Computer", "url": "https://example.com/ibm-quantum", "snippet": "IBM has announced its latest quantum computer featuring 1000 qubits, significantly expanding computational capabilities.", "position": 2 } ], "arxiv": [ { "title": "Quantum Error Correction Using Surface Codes", "url": "https://arxiv.org/abs/2301.12345", "snippet": "This paper presents a new approach to quantum error correction using surface codes that improves error tolerance by an order of magnitude.", "authors": ["Smith, J.", "Johnson, A."], "published_date": "2025-01-15", "position": 1 } ] } # Process the results collector = ResultCollector() processed_results = collector.process_results(mock_results, dedup=True) # Print summary total_results = len(processed_results) print(f"Found {total_results} mock results after deduplication") # Print results by source results_by_source = {} for result in processed_results: source = result.get("source", "unknown") if source not in results_by_source: results_by_source[source] = 0 results_by_source[source] += 1 print("Results by source:") for source, count in results_by_source.items(): print(f" {source}: {count}") # Print top 3 results if processed_results: print("\nTop 3 results:") for i, result in enumerate(processed_results[:3]): print(f" {i+1}. {result['title']}") print(f" URL: {result['url']}") print(f" Snippet: {result['snippet'][:100]}...") print() # Return mock test results return { "query": "What are the latest advancements in quantum computing?", "structured_query": structured_query, "search_engines": ["google", "arxiv"], "raw_results": mock_results, "processed_results": processed_results, "timing": { "query_processing": 0.5, "search_execution": 1.2, "total": 1.7 }, "summary": { "total_results": total_results, "results_by_source": results_by_source } } def main(): """Main function.""" # Test queries test_queries = [ "What are the latest advancements in quantum computing?", "Compare blockchain and traditional databases for financial applications", "Explain the implications of blockchain technology in finance" ] # Run tests all_results = {} for query in test_queries: print("\n" + "="*80) print(f"Testing query: {query}") print("="*80) # Test with all available search engines results = test_search_execution(query) # Save results for this query all_results[query] = results print("\n") # Save all test results save_test_results(all_results, "search_execution_test_results.json") if __name__ == "__main__": main()