ira/examples/scripts/academic_search_example.py

88 lines
3.2 KiB
Python

"""
Example script for using the academic search handlers.
"""
import asyncio
import sys
import os
from datetime import datetime
# Add the project root to the Python path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
from execution.search_executor import SearchExecutor
from query.query_processor import get_query_processor
from config.config import get_config
async def main():
"""Run a sample academic search."""
# Initialize components
query_processor = get_query_processor()
search_executor = SearchExecutor()
# Get a list of available search engines
available_engines = search_executor.get_available_search_engines()
print(f"Available search engines: {', '.join(available_engines)}")
# Check if academic search engines are available
academic_engines = ["openalex", "core", "scholar", "arxiv"]
available_academic = [engine for engine in academic_engines if engine in available_engines]
if not available_academic:
print("No academic search engines are available. Please check your configuration.")
return
else:
print(f"Available academic search engines: {', '.join(available_academic)}")
# Prompt for the query
query = input("Enter your academic research query: ") or "What are the latest papers on large language model alignment?"
print(f"\nProcessing query: {query}")
# Process the query
start_time = datetime.now()
structured_query = await query_processor.process_query(query)
# Add academic query flag
structured_query["is_academic"] = True
# Generate search queries optimized for each engine
structured_query = await query_processor.generate_search_queries(
structured_query, available_academic
)
# Print the optimized queries
print("\nOptimized queries for academic search:")
for engine in available_academic:
print(f"\n{engine.upper()} queries:")
for i, q in enumerate(structured_query.get("search_queries", {}).get(engine, [])):
print(f"{i+1}. {q}")
# Execute the search
results = await search_executor.execute_search_async(
structured_query,
search_engines=available_academic,
num_results=5
)
# Print the results
total_results = sum(len(engine_results) for engine_results in results.values())
print(f"\nFound {total_results} academic results:")
for engine, engine_results in results.items():
print(f"\n--- {engine.upper()} Results ({len(engine_results)}) ---")
for i, result in enumerate(engine_results):
print(f"\n{i+1}. {result.get('title', 'No title')}")
print(f"Authors: {result.get('authors', 'Unknown')}")
print(f"Year: {result.get('year', 'Unknown')}")
print(f"Access: {result.get('access_status', 'Unknown')}")
print(f"URL: {result.get('url', 'No URL')}")
print(f"Snippet: {result.get('snippet', 'No snippet')[0:200]}...")
end_time = datetime.now()
print(f"\nSearch completed in {(end_time - start_time).total_seconds():.2f} seconds")
if __name__ == "__main__":
asyncio.run(main())