88 lines
3.2 KiB
Python
88 lines
3.2 KiB
Python
"""
|
|
Example script for using the academic search handlers.
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Add the project root to the Python path
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
|
|
|
|
from execution.search_executor import SearchExecutor
|
|
from query.query_processor import get_query_processor
|
|
from config.config import get_config
|
|
|
|
|
|
async def main():
|
|
"""Run a sample academic search."""
|
|
# Initialize components
|
|
query_processor = get_query_processor()
|
|
search_executor = SearchExecutor()
|
|
|
|
# Get a list of available search engines
|
|
available_engines = search_executor.get_available_search_engines()
|
|
print(f"Available search engines: {', '.join(available_engines)}")
|
|
|
|
# Check if academic search engines are available
|
|
academic_engines = ["openalex", "core", "scholar", "arxiv"]
|
|
available_academic = [engine for engine in academic_engines if engine in available_engines]
|
|
|
|
if not available_academic:
|
|
print("No academic search engines are available. Please check your configuration.")
|
|
return
|
|
else:
|
|
print(f"Available academic search engines: {', '.join(available_academic)}")
|
|
|
|
# Prompt for the query
|
|
query = input("Enter your academic research query: ") or "What are the latest papers on large language model alignment?"
|
|
|
|
print(f"\nProcessing query: {query}")
|
|
|
|
# Process the query
|
|
start_time = datetime.now()
|
|
structured_query = await query_processor.process_query(query)
|
|
|
|
# Add academic query flag
|
|
structured_query["is_academic"] = True
|
|
|
|
# Generate search queries optimized for each engine
|
|
structured_query = await query_processor.generate_search_queries(
|
|
structured_query, available_academic
|
|
)
|
|
|
|
# Print the optimized queries
|
|
print("\nOptimized queries for academic search:")
|
|
for engine in available_academic:
|
|
print(f"\n{engine.upper()} queries:")
|
|
for i, q in enumerate(structured_query.get("search_queries", {}).get(engine, [])):
|
|
print(f"{i+1}. {q}")
|
|
|
|
# Execute the search
|
|
results = await search_executor.execute_search_async(
|
|
structured_query,
|
|
search_engines=available_academic,
|
|
num_results=5
|
|
)
|
|
|
|
# Print the results
|
|
total_results = sum(len(engine_results) for engine_results in results.values())
|
|
print(f"\nFound {total_results} academic results:")
|
|
|
|
for engine, engine_results in results.items():
|
|
print(f"\n--- {engine.upper()} Results ({len(engine_results)}) ---")
|
|
for i, result in enumerate(engine_results):
|
|
print(f"\n{i+1}. {result.get('title', 'No title')}")
|
|
print(f"Authors: {result.get('authors', 'Unknown')}")
|
|
print(f"Year: {result.get('year', 'Unknown')}")
|
|
print(f"Access: {result.get('access_status', 'Unknown')}")
|
|
print(f"URL: {result.get('url', 'No URL')}")
|
|
print(f"Snippet: {result.get('snippet', 'No snippet')[0:200]}...")
|
|
|
|
end_time = datetime.now()
|
|
print(f"\nSearch completed in {(end_time - start_time).total_seconds():.2f} seconds")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |