pynamer/pynamer.py

#!/Volumes/SAM2/CODE/pynamer/.venv/bin/python

import argparse
import base64
import io
import os
import sys
from pathlib import Path
import yaml
from typing import Dict, List, Optional, Union

import litellm
from litellm import completion
import logging
from PIL import Image # Added for image processing

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('pynamer')

class PyNamer:
    """A tool to generate descriptive filenames for images using LLMs."""

    def __init__(self, config_path: str = 'config.yaml'):
        """Initialize the PyNamer with configuration.

        Args:
            config_path: Path to the YAML configuration file
        """
        self.config = self._load_config(config_path)
        self._setup_llm()

    def _load_config(self, config_path: str) -> Dict:
        """Load configuration from YAML file.

        Args:
            config_path: Path to the configuration file

        Returns:
            Dict containing configuration
        """
        try:
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
            logger.info(f"Loaded configuration from {config_path}")
            return config
        except Exception as e:
            logger.error(f"Failed to load configuration: {e}")
            sys.exit(1)

    def _setup_llm(self) -> None:
        """Set up the LLM client based on configuration."""
        llm_config = self.config.get('llm', {})

        # Set API key if provided in config
        api_key = llm_config.get('api_key')
        if api_key:
            os.environ["OPENAI_API_KEY"] = api_key

        # Set custom endpoint if provided
        endpoint = llm_config.get('endpoint')
        if endpoint:
            os.environ["OPENAI_API_BASE"] = endpoint

        self.model = llm_config.get('model', 'gpt-4-vision-preview')
        self.max_tokens = llm_config.get('max_tokens', 100)
        self.temperature = llm_config.get('temperature', 0.7)

        # Image processing settings
        image_config = self.config.get('image', {})
        self.resize_max_dimension = image_config.get('resize_max_dimension', 1024) # Default max dimension
        self.resize_format = image_config.get('resize_format', 'JPEG') # Default format after resize

        logger.info(f"LLM setup complete. Using model: {self.model}")
        logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")

    def _resize_and_encode_image(self, image_path: str) -> str:
        """Resize image if necessary and encode to base64 for API submission.

        Args:
            image_path: Path to the image file

        Returns:
            Base64 encoded image string
        """
        try:
            with Image.open(image_path) as img:
                # Calculate new size maintaining aspect ratio
                width, height = img.size
                if max(width, height) > self.resize_max_dimension:
                    if width > height:
                        new_width = self.resize_max_dimension
                        new_height = int(height * (self.resize_max_dimension / width))
                    else:
                        new_height = self.resize_max_dimension
                        new_width = int(width * (self.resize_max_dimension / height))

                    logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
                    img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                else:
                    logger.debug("Image size is within limits, no resize needed.")

                # Save resized image to a bytes buffer
                buffer = io.BytesIO()
                # Handle potential transparency issues when saving as JPEG
                if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
                     img = img.convert('RGB')
                img.save(buffer, format=self.resize_format)
                img_bytes = buffer.getvalue()

            return base64.b64encode(img_bytes).decode('utf-8')
        except Exception as e:
            logger.error(f"Error processing image {image_path}: {e}")
            raise # Re-raise the exception to be caught by the caller

    def _is_supported_format(self, file_path: str) -> bool:
        """Check if the file format is supported.

        Args:
            file_path: Path to the file

        Returns:
            True if supported, False otherwise
        """
        supported_formats = self.config.get('image', {}).get('supported_formats', [])
        file_ext = os.path.splitext(file_path)[1].lower()
        return file_ext in supported_formats

    def generate_filename(self, image_path: str) -> str:
        """Generate a descriptive filename for the image using LLM.

        Args:
            image_path: Path to the image file

        Returns:
            Generated filename (without extension)
        """
        if not os.path.exists(image_path):
            logger.error(f"Image not found: {image_path}")
            return None

        if not self._is_supported_format(image_path):
            logger.error(f"Unsupported file format: {image_path}")
            return None

        try:
            # Resize and encode image
            base64_image = self._resize_and_encode_image(image_path)

            # Determine the mime type based on the resize format
            mime_type = f"image/{self.resize_format.lower()}"

            # Prepare messages for LLM
            system_message = self.config.get('prompt', {}).get('system_message', '')
            user_message = self.config.get('prompt', {}).get('user_message', '')

            messages = [
                {"role": "system", "content": system_message},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_message},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
                        }
                    ]
                }
            ]

            # Call LLM
            response = completion(
                model=self.model,
                messages=messages,
                max_tokens=self.max_tokens,
                temperature=self.temperature
            )

            # Extract filename from response
            filename = response.choices[0].message.content.strip()
            logger.info(f"Generated filename: {filename}")
            return filename

        except Exception as e:
            logger.error(f"Error generating filename: {e}")
            return None

    def rename_image(self, image_path: str, dry_run: bool = False) -> Optional[str]:
        """Rename the image with a generated descriptive filename.

        Args:
            image_path: Path to the image file
            dry_run: If True, don't actually rename the file

        Returns:
            New path if successful, None otherwise
        """
        # Generate filename
        new_filename = self.generate_filename(image_path)
        if not new_filename:
            return None

        # Clean up the filename (ensure snake_case, no special chars)
        new_filename = new_filename.lower()
        new_filename = ''.join(c if c.isalnum() else '_' for c in new_filename)
        new_filename = new_filename.replace('__', '_').strip('_')

        # Get original path components
        path = Path(image_path)
        directory = path.parent
        extension = path.suffix

        # Create new path
        new_path = directory / f"{new_filename}{extension}"

        # Rename file
        if not dry_run:
            try:
                # Handle case where the new filename already exists
                counter = 1
                while new_path.exists():
                    new_path = directory / f"{new_filename}_{counter}{extension}"
                    counter += 1

                path.rename(new_path)
                logger.info(f"Renamed: {image_path} -> {new_path}")
            except Exception as e:
                logger.error(f"Error renaming file: {e}")
                return None
        else:
            logger.info(f"[DRY RUN] Would rename: {image_path} -> {new_path}")

        return str(new_path)

def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description='Generate descriptive filenames for images using LLMs')
    parser.add_argument('images', nargs='+', help='Paths to image files')
    parser.add_argument('-c', '--config', default='config.yaml', help='Path to configuration file')
    parser.add_argument('-d', '--dry-run', action='store_true', help='Preview changes without renaming files')
    parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')

    args = parser.parse_args()

    # Set logging level
    if args.verbose:
        logger.setLevel(logging.DEBUG)

    # Initialize PyNamer
    namer = PyNamer(config_path=args.config)

    # Process each image - handle image paths that might contain spaces
    image_paths = []

    # First, try to handle the case where the entire argument list is a single path with spaces
    if len(args.images) > 1:
        combined_path = ' '.join(args.images)
        if os.path.exists(combined_path):
            image_paths = [combined_path]
            logger.info(f"Found image by combining all arguments: {combined_path}")

    # If that didn't work, try to handle each argument individually
    if not image_paths:
        # Use a set to avoid duplicate processing
        processed_paths = set()

        for path in args.images:
            # Check if the path exists as is
            if os.path.exists(path) and path not in processed_paths:
                image_paths.append(path)
                processed_paths.add(path)
            else:
                # Try to find files that match the pattern
                import glob
                matching_files = glob.glob(f"*{path}*")
                for file in matching_files:
                    if file not in processed_paths:
                        image_paths.append(file)
                        processed_paths.add(file)

                if not matching_files:
                    logger.debug(f"Could not find any file matching '{path}'")

    # Process each valid image path
    if not image_paths:
        print("Error: No valid image files found to process.")
        return

    # Remove duplicates while preserving order
    unique_image_paths = []
    seen = set()
    for path in image_paths:
        if path not in seen:
            unique_image_paths.append(path)
            seen.add(path)

    # Process each image
    for image_path in unique_image_paths:
        if not os.path.exists(image_path):
            logger.warning(f"Skipping non-existent file: {image_path}")
            continue

        new_path = namer.rename_image(image_path, dry_run=args.dry_run)
        if new_path:
            print(f"{'[DRY RUN] ' if args.dry_run else ''}Renamed: {image_path} -> {new_path}")
        else:
            print(f"Failed to process: {image_path}")

if __name__ == "__main__":
    main()