268 lines
9.0 KiB
Python
268 lines
9.0 KiB
Python
"""
|
|
Test script for the search execution module.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import time
|
|
from typing import Dict, List, Any, Optional
|
|
|
|
# Import the necessary modules
|
|
try:
|
|
from query.query_processor import get_query_processor, QueryProcessor
|
|
from query.llm_interface import get_llm_interface
|
|
from execution.search_executor import SearchExecutor
|
|
from execution.result_collector import ResultCollector
|
|
except ImportError as e:
|
|
print(f"Import error: {e}")
|
|
print("Make sure all required modules are installed and available.")
|
|
exit(1)
|
|
|
|
|
|
def get_query_processor():
|
|
"""Get a query processor instance."""
|
|
# First set the LLM interface to use Groq's model
|
|
from query.llm_interface import get_llm_interface
|
|
get_llm_interface(model_name="llama-3.1-8b-instant")
|
|
|
|
# Then get the query processor which will use the configured LLM interface
|
|
from query.query_processor import get_query_processor
|
|
return get_query_processor()
|
|
|
|
|
|
def test_search_execution(query: str, search_engines: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
"""
|
|
Test the search execution module.
|
|
|
|
Args:
|
|
query: The query to process and execute
|
|
search_engines: List of search engines to use (if None, use all available)
|
|
|
|
Returns:
|
|
Dictionary with test results
|
|
"""
|
|
print(f"Testing search execution for query: {query}")
|
|
|
|
# Process the query
|
|
processor = get_query_processor()
|
|
start_time = time.time()
|
|
structured_query = processor.process_query(query)
|
|
query_time = time.time() - start_time
|
|
|
|
print(f"Query processed in {query_time:.2f} seconds")
|
|
print(f"Enhanced query: {structured_query.get('enhanced_query', '')}")
|
|
print(f"Classification: {structured_query.get('classification', {})}")
|
|
|
|
# Execute the search
|
|
executor = SearchExecutor()
|
|
|
|
# Get available search engines if none specified
|
|
if search_engines is None:
|
|
search_engines = executor.get_available_search_engines()
|
|
print(f"Using available search engines: {search_engines}")
|
|
|
|
# Execute the search
|
|
start_time = time.time()
|
|
search_results = executor.execute_search(structured_query, search_engines=search_engines)
|
|
search_time = time.time() - start_time
|
|
|
|
print(f"Search executed in {search_time:.2f} seconds")
|
|
|
|
# Print raw search results for debugging
|
|
print("\nRaw search results:")
|
|
for engine, results in search_results.items():
|
|
print(f" {engine}: {len(results)} results")
|
|
if results:
|
|
print(f" Sample result: {results[0]}")
|
|
|
|
# Process the results
|
|
collector = ResultCollector()
|
|
processed_results = collector.process_results(search_results, dedup=True)
|
|
|
|
# Print summary of results
|
|
total_results = len(processed_results)
|
|
print(f"Found {total_results} results after deduplication")
|
|
|
|
# Print results by source
|
|
results_by_source = {}
|
|
for result in processed_results:
|
|
source = result.get("source", "unknown")
|
|
if source not in results_by_source:
|
|
results_by_source[source] = 0
|
|
results_by_source[source] += 1
|
|
|
|
print("Results by source:")
|
|
for source, count in results_by_source.items():
|
|
print(f" {source}: {count}")
|
|
|
|
# Print top 3 results
|
|
if processed_results:
|
|
print("\nTop 3 results:")
|
|
for i, result in enumerate(processed_results[:3]):
|
|
print(f" {i+1}. {result['title']}")
|
|
print(f" URL: {result['url']}")
|
|
print(f" Snippet: {result['snippet'][:100]}...")
|
|
print()
|
|
|
|
# Return test results
|
|
return {
|
|
"query": query,
|
|
"structured_query": structured_query,
|
|
"search_engines": search_engines,
|
|
"raw_results": search_results,
|
|
"processed_results": processed_results,
|
|
"timing": {
|
|
"query_processing": query_time,
|
|
"search_execution": search_time,
|
|
"total": query_time + search_time
|
|
},
|
|
"summary": {
|
|
"total_results": total_results,
|
|
"results_by_source": results_by_source
|
|
}
|
|
}
|
|
|
|
|
|
def save_test_results(results: Dict[str, Any], file_path: str) -> None:
|
|
"""
|
|
Save test results to a file.
|
|
|
|
Args:
|
|
results: Test results to save
|
|
file_path: Path to save results to
|
|
"""
|
|
try:
|
|
with open(file_path, 'w') as f:
|
|
json.dump(results, f, indent=2)
|
|
print(f"Test results saved to {file_path}")
|
|
except Exception as e:
|
|
print(f"Error saving test results: {e}")
|
|
|
|
|
|
def mock_test():
|
|
"""Run a mock test without actual API calls."""
|
|
print("Running mock test without API calls...")
|
|
|
|
# Create a mock structured query
|
|
structured_query = {
|
|
"original_query": "What are the latest advancements in quantum computing?",
|
|
"enhanced_query": "Explore the most recent breakthroughs and developments in quantum computing technology, including hardware innovations, quantum algorithms, and potential applications.",
|
|
"classification": {
|
|
"type": "exploratory",
|
|
"intent": "research",
|
|
"entities": ["quantum computing", "advancements", "technology"]
|
|
},
|
|
"search_queries": {
|
|
"google": "latest advancements in quantum computing 2025 breakthroughs",
|
|
"scholar": "recent quantum computing developments research papers",
|
|
"arxiv": "quantum computing hardware algorithms applications"
|
|
}
|
|
}
|
|
|
|
# Create mock search results
|
|
mock_results = {
|
|
"google": [
|
|
{
|
|
"title": "Quantum Computing Breakthrough: New Qubit Design Achieves 99.9% Fidelity",
|
|
"url": "https://example.com/quantum-breakthrough",
|
|
"snippet": "Researchers at MIT have developed a new qubit design that achieves 99.9% fidelity, a major step toward practical quantum computing.",
|
|
"position": 1
|
|
},
|
|
{
|
|
"title": "IBM Unveils 1000-Qubit Quantum Computer",
|
|
"url": "https://example.com/ibm-quantum",
|
|
"snippet": "IBM has announced its latest quantum computer featuring 1000 qubits, significantly expanding computational capabilities.",
|
|
"position": 2
|
|
}
|
|
],
|
|
"arxiv": [
|
|
{
|
|
"title": "Quantum Error Correction Using Surface Codes",
|
|
"url": "https://arxiv.org/abs/2301.12345",
|
|
"snippet": "This paper presents a new approach to quantum error correction using surface codes that improves error tolerance by an order of magnitude.",
|
|
"authors": ["Smith, J.", "Johnson, A."],
|
|
"published_date": "2025-01-15",
|
|
"position": 1
|
|
}
|
|
]
|
|
}
|
|
|
|
# Process the results
|
|
collector = ResultCollector()
|
|
processed_results = collector.process_results(mock_results, dedup=True)
|
|
|
|
# Print summary
|
|
total_results = len(processed_results)
|
|
print(f"Found {total_results} mock results after deduplication")
|
|
|
|
# Print results by source
|
|
results_by_source = {}
|
|
for result in processed_results:
|
|
source = result.get("source", "unknown")
|
|
if source not in results_by_source:
|
|
results_by_source[source] = 0
|
|
results_by_source[source] += 1
|
|
|
|
print("Results by source:")
|
|
for source, count in results_by_source.items():
|
|
print(f" {source}: {count}")
|
|
|
|
# Print top 3 results
|
|
if processed_results:
|
|
print("\nTop 3 results:")
|
|
for i, result in enumerate(processed_results[:3]):
|
|
print(f" {i+1}. {result['title']}")
|
|
print(f" URL: {result['url']}")
|
|
print(f" Snippet: {result['snippet'][:100]}...")
|
|
print()
|
|
|
|
# Return mock test results
|
|
return {
|
|
"query": "What are the latest advancements in quantum computing?",
|
|
"structured_query": structured_query,
|
|
"search_engines": ["google", "arxiv"],
|
|
"raw_results": mock_results,
|
|
"processed_results": processed_results,
|
|
"timing": {
|
|
"query_processing": 0.5,
|
|
"search_execution": 1.2,
|
|
"total": 1.7
|
|
},
|
|
"summary": {
|
|
"total_results": total_results,
|
|
"results_by_source": results_by_source
|
|
}
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Main function."""
|
|
# Test queries
|
|
test_queries = [
|
|
"What are the latest advancements in quantum computing?",
|
|
"Compare blockchain and traditional databases for financial applications",
|
|
"Explain the implications of blockchain technology in finance"
|
|
]
|
|
|
|
# Run tests
|
|
all_results = {}
|
|
for query in test_queries:
|
|
print("\n" + "="*80)
|
|
print(f"Testing query: {query}")
|
|
print("="*80)
|
|
|
|
# Test with all available search engines
|
|
results = test_search_execution(query)
|
|
|
|
# Save results for this query
|
|
all_results[query] = results
|
|
|
|
print("\n")
|
|
|
|
# Save all test results
|
|
save_test_results(all_results, "search_execution_test_results.json")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|