""" GitHub API handler for code search. This module implements a search handler for GitHub's API, allowing code searches across GitHub repositories. """ import os import requests from typing import Dict, List, Any, Optional from config.config import get_config from ..api_handlers.base_handler import BaseSearchHandler class GitHubSearchHandler(BaseSearchHandler): """Handler for GitHub code search.""" def __init__(self): """Initialize the GitHub search handler.""" self.config = get_config() self.api_key = os.environ.get('GITHUB_API_KEY') or self.config.config_data.get('api_keys', {}).get('github') self.api_url = "https://api.github.com" self.search_endpoint = "/search/code" self.user_agent = "SimSearch-Research-Assistant" def search(self, query: str, num_results: int = 10, **kwargs) -> List[Dict[str, Any]]: """ Execute a code search on GitHub. Args: query: The search query num_results: Number of results to return **kwargs: Additional search parameters - language: Filter by programming language - sort: Sort by (indexed, stars, forks, updated) - order: Sort order (asc, desc) Returns: List of search results """ if not self.is_available(): return [] # Prepare query parameters params = { "q": query, "per_page": min(num_results, 30), # GitHub API limit "page": 1 } # Add optional parameters if kwargs.get("language"): params["q"] += f" language:{kwargs['language']}" if kwargs.get("sort"): params["sort"] = kwargs["sort"] if kwargs.get("order"): params["order"] = kwargs["order"] # Set up headers headers = { "Authorization": f"token {self.api_key}", "Accept": "application/vnd.github.v3+json", "User-Agent": self.user_agent } try: # Make the API request response = requests.get( f"{self.api_url}{self.search_endpoint}", params=params, headers=headers ) response.raise_for_status() # Process results data = response.json() results = [] for item in data.get("items", []): # For each code result, fetch a bit of the file content snippet = self._get_code_snippet(item) if item.get("url") else "Code snippet not available" # Construct a standardized result entry result = { "title": item.get("name", "Unnamed"), "url": item.get("html_url", ""), "snippet": snippet, "source": "github", "metadata": { "repository": item.get("repository", {}).get("full_name", ""), "path": item.get("path", ""), "language": kwargs.get("language", ""), "score": item.get("score", 0) } } results.append(result) return results except requests.RequestException as e: print(f"GitHub API error: {e}") return [] def _get_code_snippet(self, item: Dict[str, Any]) -> str: """ Fetch a snippet of the code file. Args: item: The GitHub code search result item Returns: A string containing a snippet of the code """ try: # Get the raw content URL content_url = item.get("url") if not content_url: return "Content not available" # Request the content headers = { "Authorization": f"token {self.api_key}", "Accept": "application/vnd.github.v3.raw", "User-Agent": self.user_agent } response = requests.get(content_url, headers=headers) response.raise_for_status() # Get content and create a snippet content = response.json().get("content", "") if content: # GitHub returns Base64 encoded content import base64 decoded = base64.b64decode(content).decode('utf-8') # Create a snippet (first ~500 chars) snippet = decoded[:500] + ("..." if len(decoded) > 500 else "") return snippet return "Content not available" except Exception as e: print(f"Error fetching code snippet: {e}") return "Error fetching code snippet" def get_name(self) -> str: """ Get the name of the search handler. Returns: Name of the search handler """ return "github" def is_available(self) -> bool: """ Check if the GitHub API is available and properly configured. Returns: True if the API is available, False otherwise """ return self.api_key is not None def get_rate_limit_info(self) -> Dict[str, Any]: """ Get information about GitHub API rate limits. Returns: Dictionary with rate limit information """ if not self.is_available(): return {"error": "GitHub API not configured"} try: headers = { "Authorization": f"token {self.api_key}", "Accept": "application/vnd.github.v3+json", "User-Agent": self.user_agent } response = requests.get( f"{self.api_url}/rate_limit", headers=headers ) response.raise_for_status() data = response.json() rate_limits = data.get("resources", {}).get("search", {}) return { "requests_per_minute": 30, # GitHub search API limit "requests_per_hour": rate_limits.get("limit", 0), "current_usage": { "remaining": rate_limits.get("remaining", 0), "reset_time": rate_limits.get("reset", 0) } } except Exception as e: print(f"Error getting rate limit info: {e}") return { "error": str(e), "requests_per_minute": 30, "requests_per_hour": 5000 # Default limit }