""" NewsAPI handler for current events searches. Provides access to recent news articles from various sources. """ import os import requests import datetime from typing import Dict, List, Any, Optional from .base_handler import BaseSearchHandler from config.config import get_config, get_api_key class NewsSearchHandler(BaseSearchHandler): """Handler for NewsAPI.org for current events searches.""" def __init__(self): """Initialize the NewsAPI search handler.""" self.config = get_config() self.api_key = get_api_key("newsapi") self.base_url = "https://newsapi.org/v2/everything" self.top_headlines_url = "https://newsapi.org/v2/top-headlines" self.available = self.api_key is not None def search(self, query: str, num_results: int = 10, **kwargs) -> List[Dict[str, Any]]: """ Execute a search query using NewsAPI. Args: query: The search query to execute num_results: Number of results to return **kwargs: Additional search parameters: - days_back: Number of days back to search (default: 7) - sort_by: Sort by criteria ("relevancy", "popularity", "publishedAt") - language: Language code (default: "en") - sources: Comma-separated list of news sources - domains: Comma-separated list of domains - use_headlines: Whether to use top headlines endpoint (default: False) - country: Country code for headlines (default: "us") - category: Category for headlines Returns: List of search results with standardized format """ if not self.available: raise ValueError("NewsAPI is not available. API key is missing.") # Determine which endpoint to use use_headlines = kwargs.get("use_headlines", False) url = self.top_headlines_url if use_headlines else self.base_url # Calculate date range days_back = kwargs.get("days_back", 7) end_date = datetime.datetime.now().strftime("%Y-%m-%d") start_date = (datetime.datetime.now() - datetime.timedelta(days=days_back)).strftime("%Y-%m-%d") # Set up the request parameters params = { "q": query, "pageSize": num_results, "apiKey": self.api_key, } # Add parameters for everything endpoint if not use_headlines: params["from"] = start_date params["to"] = end_date params["sortBy"] = kwargs.get("sort_by", "publishedAt") if "language" in kwargs: params["language"] = kwargs["language"] else: params["language"] = "en" # Default to English if "sources" in kwargs: params["sources"] = kwargs["sources"] if "domains" in kwargs: params["domains"] = kwargs["domains"] # Add parameters for top-headlines endpoint else: if "country" in kwargs: params["country"] = kwargs["country"] else: params["country"] = "us" # Default to US if "category" in kwargs: params["category"] = kwargs["category"] try: # Make the request response = requests.get(url, params=params) response.raise_for_status() # Parse the response data = response.json() # Check if the request was successful if data.get("status") != "ok": print(f"NewsAPI error: {data.get('message', 'Unknown error')}") return [] # Process the results results = [] for article in data.get("articles", []): # Get the publication date with proper formatting pub_date = article.get("publishedAt", "") if pub_date: try: date_obj = datetime.datetime.fromisoformat(pub_date.replace("Z", "+00:00")) formatted_date = date_obj.strftime("%Y-%m-%d %H:%M:%S") except ValueError: formatted_date = pub_date else: formatted_date = "" # Create a standardized result result = { "title": article.get("title", ""), "url": article.get("url", ""), "snippet": article.get("description", ""), "source": f"news:{article.get('source', {}).get('name', 'unknown')}", "published_date": formatted_date, "author": article.get("author", ""), "image_url": article.get("urlToImage", ""), "content": article.get("content", "") } results.append(result) return results except requests.exceptions.RequestException as e: print(f"Error executing NewsAPI search: {e}") return [] def get_name(self) -> str: """Get the name of the search handler.""" return "news" def is_available(self) -> bool: """Check if the NewsAPI is available.""" return self.available def get_rate_limit_info(self) -> Dict[str, Any]: """Get information about the API's rate limits.""" # These are based on NewsAPI's developer plan return { "requests_per_minute": 100, "requests_per_day": 500, # Free tier limit "current_usage": None # NewsAPI doesn't provide usage info in responses }