Update README with new features and improved config handling

2025-03-28 23:55:37 -05:00 · 2025-03-28 23:55:37 -05:00 · 11ea971542
parent 1a8cae62ca
commit 11ea971542
2 changed files with 79 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
 - Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
 - Configurable via YAML config file
 - Supports multiple image formats (jpg, jpeg, png, gif, webp)
 - Automatically resizes large images before processing (configurable max dimension)
 - Dry-run mode to preview changes without renaming files
 - Handles filename collisions automatically
 - Robust config file discovery (user config, package config, or explicit path)
 ## Installation
@ -47,17 +49,35 @@ You can customize the following settings:
 - LLM provider and model
 - API key and endpoint
 - Supported image formats
 - Image resizing parameters (max dimension, output format)
 - Prompt templates for filename generation
 Example configuration file:
 ```yaml
 # LLM API Configuration
 llm:
  provider: "openai"
  model: "gpt-4-vision-preview"
  api_key: "your-api-key-here"
  max_tokens: 100
  temperature: 0.7
 # Image Processing
 image:
  supported_formats:
    - ".jpg"
    - ".jpeg"
    - ".png"
    - ".gif"
    - ".webp"
  resize_max_dimension: 1024  # Max width/height before resizing
  resize_format: "JPEG"      # Output format for resized images
 # Prompt Configuration
 prompt:
  system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
  user_message: "Generate a descriptive filename for this image..."
 ```
 ## Usage
--- a/src/pynamer/core.py
+++ b/src/pynamer/core.py
@ -1,14 +1,18 @@
 """Core functionality for PyNamer."""
 import argparse
 import base64
 import io
 import os
 import sys
 from pathlib import Path
 import yaml
 from typing import Dict, List, Optional, Union
 import litellm
 from litellm import completion
 import logging
 from PIL import Image
 # Configure logging
 logging.basicConfig(
@ -24,7 +28,8 @@ class PyNamer:
        """Initialize the PyNamer with configuration.
        Args:
-            config_path: Path to the YAML configuration file
+            config_path: Optional path to the YAML configuration file.
                         If None, will look in default locations.
        """
        if config_path is None:
            # Look for config in user's home directory first
@ -34,8 +39,13 @@ class PyNamer:
                logger.info(f"Using user config from {user_config_path}")
            else:
                # Fall back to default config in package
-                config_path = os.path.join(os.path.dirname(__file__), 'config.yaml')
+                package_dir = os.path.dirname(os.path.abspath(__file__))
-                logger.info(f"Using default config from {config_path}")
+                config_path = os.path.join(package_dir, 'config.yaml')
                if os.path.exists(config_path):
                    logger.info(f"Using package config from {config_path}")
                else:
                    logger.error("No configuration file found in package directory")
                    sys.exit(1)
        self.config = self._load_config(config_path)
        self._setup_llm()
@ -76,10 +86,16 @@ class PyNamer:
        self.max_tokens = llm_config.get('max_tokens', 100)
        self.temperature = llm_config.get('temperature', 0.7)
-        logger.info(f"LLM setup complete. Using model: {self.model}")
+        # Image processing settings
        image_config = self.config.get('image', {})
        self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
        self.resize_format = image_config.get('resize_format', 'JPEG')
-    def _encode_image(self, image_path: str) -> str:
+        logger.info(f"LLM setup complete. Using model: {self.model}")
-        """Encode image to base64 for API submission.
+        logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
    def _resize_and_encode_image(self, image_path: str) -> str:
        """Resize image if necessary and encode to base64 for API submission.
        Args:
            image_path: Path to the image file
@ -87,8 +103,35 @@ class PyNamer:
        Returns:
            Base64 encoded image string
        """
-        with open(image_path, "rb") as image_file:
+        try:
-            return base64.b64encode(image_file.read()).decode('utf-8')
+            with Image.open(image_path) as img:
                # Calculate new size maintaining aspect ratio
                width, height = img.size
                if max(width, height) > self.resize_max_dimension:
                    if width > height:
                        new_width = self.resize_max_dimension
                        new_height = int(height * (self.resize_max_dimension / width))
                    else:
                        new_height = self.resize_max_dimension
                        new_width = int(width * (self.resize_max_dimension / height))
                    logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
                    img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                else:
                    logger.debug("Image size is within limits, no resize needed.")
                # Save resized image to a bytes buffer
                buffer = io.BytesIO()
                # Handle potential transparency issues when saving as JPEG
                if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
                     img = img.convert('RGB')
                img.save(buffer, format=self.resize_format)
                img_bytes = buffer.getvalue()
            return base64.b64encode(img_bytes).decode('utf-8')
        except Exception as e:
            logger.error(f"Error processing image {image_path}: {e}")
            raise
    def _is_supported_format(self, file_path: str) -> bool:
        """Check if the file format is supported.
@ -121,8 +164,11 @@ class PyNamer:
            return None
        try:
-            # Encode image
+            # Resize and encode image
-            base64_image = self._encode_image(image_path)
+            base64_image = self._resize_and_encode_image(image_path)
            # Determine the mime type based on the resize format
            mime_type = f"image/{self.resize_format.lower()}"
            # Prepare messages for LLM
            system_message = self.config.get('prompt', {}).get('system_message', '')
@ -136,7 +182,7 @@ class PyNamer:
                        {"type": "text", "text": user_message},
                        {
                            "type": "image_url",
-                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
+                            "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
                        }
                    ]
                }