ira/execution/api_handlers/news_handler.py

152 lines
5.7 KiB
Python

"""
NewsAPI handler for current events searches.
Provides access to recent news articles from various sources.
"""
import os
import requests
import datetime
from typing import Dict, List, Any, Optional
from .base_handler import BaseSearchHandler
from config.config import get_config, get_api_key
class NewsSearchHandler(BaseSearchHandler):
"""Handler for NewsAPI.org for current events searches."""
def __init__(self):
"""Initialize the NewsAPI search handler."""
self.config = get_config()
self.api_key = get_api_key("newsapi")
self.base_url = "https://newsapi.org/v2/everything"
self.top_headlines_url = "https://newsapi.org/v2/top-headlines"
self.available = self.api_key is not None
def search(self, query: str, num_results: int = 10, **kwargs) -> List[Dict[str, Any]]:
"""
Execute a search query using NewsAPI.
Args:
query: The search query to execute
num_results: Number of results to return
**kwargs: Additional search parameters:
- days_back: Number of days back to search (default: 7)
- sort_by: Sort by criteria ("relevancy", "popularity", "publishedAt")
- language: Language code (default: "en")
- sources: Comma-separated list of news sources
- domains: Comma-separated list of domains
- use_headlines: Whether to use top headlines endpoint (default: False)
- country: Country code for headlines (default: "us")
- category: Category for headlines
Returns:
List of search results with standardized format
"""
if not self.available:
raise ValueError("NewsAPI is not available. API key is missing.")
# Determine which endpoint to use
use_headlines = kwargs.get("use_headlines", False)
url = self.top_headlines_url if use_headlines else self.base_url
# Calculate date range
days_back = kwargs.get("days_back", 7)
end_date = datetime.datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.datetime.now() - datetime.timedelta(days=days_back)).strftime("%Y-%m-%d")
# Set up the request parameters
params = {
"q": query,
"pageSize": num_results,
"apiKey": self.api_key,
}
# Add parameters for everything endpoint
if not use_headlines:
params["from"] = start_date
params["to"] = end_date
params["sortBy"] = kwargs.get("sort_by", "publishedAt")
if "language" in kwargs:
params["language"] = kwargs["language"]
else:
params["language"] = "en" # Default to English
if "sources" in kwargs:
params["sources"] = kwargs["sources"]
if "domains" in kwargs:
params["domains"] = kwargs["domains"]
# Add parameters for top-headlines endpoint
else:
if "country" in kwargs:
params["country"] = kwargs["country"]
else:
params["country"] = "us" # Default to US
if "category" in kwargs:
params["category"] = kwargs["category"]
try:
# Make the request
response = requests.get(url, params=params)
response.raise_for_status()
# Parse the response
data = response.json()
# Check if the request was successful
if data.get("status") != "ok":
print(f"NewsAPI error: {data.get('message', 'Unknown error')}")
return []
# Process the results
results = []
for article in data.get("articles", []):
# Get the publication date with proper formatting
pub_date = article.get("publishedAt", "")
if pub_date:
try:
date_obj = datetime.datetime.fromisoformat(pub_date.replace("Z", "+00:00"))
formatted_date = date_obj.strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
formatted_date = pub_date
else:
formatted_date = ""
# Create a standardized result
result = {
"title": article.get("title", ""),
"url": article.get("url", ""),
"snippet": article.get("description", ""),
"source": f"news:{article.get('source', {}).get('name', 'unknown')}",
"published_date": formatted_date,
"author": article.get("author", ""),
"image_url": article.get("urlToImage", ""),
"content": article.get("content", "")
}
results.append(result)
return results
except requests.exceptions.RequestException as e:
print(f"Error executing NewsAPI search: {e}")
return []
def get_name(self) -> str:
"""Get the name of the search handler."""
return "news"
def is_available(self) -> bool:
"""Check if the NewsAPI is available."""
return self.available
def get_rate_limit_info(self) -> Dict[str, Any]:
"""Get information about the API's rate limits."""
# These are based on NewsAPI's developer plan
return {
"requests_per_minute": 100,
"requests_per_day": 500, # Free tier limit
"current_usage": None # NewsAPI doesn't provide usage info in responses
}