""" Test script for the Jina Reranker integration. This script tests the reranker functionality by comparing results with and without reranking. """ import json import time from pathlib import Path from query.query_processor import QueryProcessor from execution.search_executor import SearchExecutor from execution.result_collector import ResultCollector from ranking.jina_reranker import get_jina_reranker def test_reranker(): """Test the reranker functionality.""" # Initialize components query_processor = QueryProcessor() search_executor = SearchExecutor() result_collector = ResultCollector() # Check if reranker is available try: reranker = get_jina_reranker() reranker_available = True print("Jina Reranker is available.") except ValueError: reranker_available = False print("Jina Reranker is not available. Will only test basic scoring.") # Process a test query query = "What are the latest advancements in quantum computing?" print(f"Processing query: {query}") processed_query = query_processor.process_query(query) print(f"Processed query: {processed_query}") # Execute the search available_engines = search_executor.get_available_search_engines() print(f"Available search engines: {available_engines}") if 'search_engines' not in processed_query: processed_query['search_engines'] = available_engines # Execute the search search_results = search_executor.execute_search( structured_query=processed_query, num_results=10 ) # Print which engines returned results for engine, results in search_results.items(): print(f"Engine {engine} returned {len(results)} results") # Add the query to each result for reranking for engine, results in search_results.items(): for result in results: result["query"] = processed_query.get("enhanced_query", processed_query.get("original_query", query)) # Process results without reranking print("\nProcessing results without reranking...") basic_results = result_collector.process_results( search_results, dedup=True, max_results=None, use_reranker=False ) print(f"Processed {len(basic_results)} results with basic scoring") # Save basic results results_dir = Path(__file__).parent / "results" results_dir.mkdir(exist_ok=True) timestamp = int(time.time()) basic_file = results_dir / f"basic_results_{timestamp}.json" with open(basic_file, "w") as f: json.dump(basic_results, f, indent=2) print(f"Basic results saved to {basic_file}") # Process results with reranking (if available) if reranker_available: print("\nProcessing results with reranking...") reranked_results = result_collector.process_results( search_results, dedup=True, max_results=None, use_reranker=True ) print(f"Processed {len(reranked_results)} results with reranking") # Save reranked results reranked_file = results_dir / f"reranked_results_{timestamp}.json" with open(reranked_file, "w") as f: json.dump(reranked_results, f, indent=2) print(f"Reranked results saved to {reranked_file}") # Compare top 5 results print("\nComparing top 5 results:") print("\nTop 5 results with basic scoring:") for i, result in enumerate(basic_results[:5]): print(f"{i+1}. {result.get('title')} (Score: {result.get('relevance_score')}, Source: {result.get('source')})") print("\nTop 5 results with reranking:") for i, result in enumerate(reranked_results[:5]): print(f"{i+1}. {result.get('title')} (Score: {result.get('relevance_score')}, Source: {result.get('source')})") if __name__ == "__main__": test_reranker()