""" Google Scholar API handler. Uses the Serper API to access Google Scholar search results. """ import os import json import requests from typing import Dict, List, Any, Optional from .base_handler import BaseSearchHandler from config.config import get_config, get_api_key class ScholarSearchHandler(BaseSearchHandler): """Handler for Google Scholar Search using the Serper API.""" def __init__(self): """Initialize the Google Scholar search handler.""" self.config = get_config() self.api_key = get_api_key("serper") self.base_url = "https://google.serper.dev/scholar" self.available = self.api_key is not None def search(self, query: str, num_results: int = 10, **kwargs) -> List[Dict[str, Any]]: """ Execute a Google Scholar search query using Serper API. Args: query: The search query to execute num_results: Number of results to return **kwargs: Additional search parameters: - country: Country code (default: "us") - language: Language code (default: "en") - year_start: Start year for publication date filter - year_end: End year for publication date filter Returns: List of search results with standardized format """ if not self.available: raise ValueError("Google Scholar API is not available. API key is missing.") # Set up the request parameters params = { "q": query, "num": num_results, "type": "scholar" # Specify search type as scholar } # Add optional parameters if "country" in kwargs: params["gl"] = kwargs["country"] if "language" in kwargs: params["hl"] = kwargs["language"] # Add date range if specified date_range = "" if "year_start" in kwargs and "year_end" in kwargs: date_range = f"as_ylo={kwargs['year_start']}&as_yhi={kwargs['year_end']}" elif "year_start" in kwargs: date_range = f"as_ylo={kwargs['year_start']}" elif "year_end" in kwargs: date_range = f"as_yhi={kwargs['year_end']}" if date_range: params["tbs"] = date_range # Set up the headers headers = { "X-API-KEY": self.api_key, "Content-Type": "application/json" } try: # Make the request response = requests.post( self.base_url, headers=headers, json=params ) response.raise_for_status() # Parse the response data = response.json() # Process the results results = [] # Process organic results if "organic" in data: for item in data["organic"]: result = { "title": item.get("title", ""), "url": item.get("link", ""), "snippet": item.get("snippet", ""), "source": "scholar", "authors": item.get("authors", ""), "publication": item.get("publication", ""), "year": item.get("year", "") } results.append(result) return results except requests.exceptions.RequestException as e: print(f"Error executing Google Scholar search: {e}") return [] def get_name(self) -> str: """Get the name of the search handler.""" return "scholar" def is_available(self) -> bool: """Check if the Google Scholar API is available.""" return self.available def get_rate_limit_info(self) -> Dict[str, Any]: """Get information about the API's rate limits.""" # These are example values - adjust based on your Serper plan return { "requests_per_minute": 30, # Lower for Scholar due to its specialized nature "requests_per_day": 1000, "current_usage": None # Serper doesn't provide usage info in responses }