Update README with new features and improved config handling

2025-03-28 23:55:37 -05:00 · 2025-03-28 23:55:37 -05:00 · 11ea971542
parent 1a8cae62ca
commit 11ea971542
2 changed files with 79 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
 - Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
 - Configurable via YAML config file
 - Supports multiple image formats (jpg, jpeg, png, gif, webp)
+- Automatically resizes large images before processing (configurable max dimension)
 - Dry-run mode to preview changes without renaming files
 - Handles filename collisions automatically
+- Robust config file discovery (user config, package config, or explicit path)

 ## Installation

@ -47,17 +49,35 @@ You can customize the following settings:
 - LLM provider and model
 - API key and endpoint
 - Supported image formats
+- Image resizing parameters (max dimension, output format)
 - Prompt templates for filename generation

 Example configuration file:

 ```yaml
+# LLM API Configuration
 llm:
  provider: "openai"
  model: "gpt-4-vision-preview"
  api_key: "your-api-key-here"
  max_tokens: 100
  temperature: 0.7
+
+# Image Processing
+image:
+  supported_formats:
+    - ".jpg"
+    - ".jpeg"
+    - ".png"
+    - ".gif"
+    - ".webp"
+  resize_max_dimension: 1024  # Max width/height before resizing
+  resize_format: "JPEG"      # Output format for resized images
+
+# Prompt Configuration
+prompt:
+  system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
+  user_message: "Generate a descriptive filename for this image..."
 ```

 ## Usage
--- a/src/pynamer/core.py
+++ b/src/pynamer/core.py
@ -1,14 +1,18 @@
 """Core functionality for PyNamer."""

+import argparse
 import base64
+import io
 import os
 import sys
 from pathlib import Path
 import yaml
 from typing import Dict, List, Optional, Union
+
 import litellm
 from litellm import completion
 import logging
+from PIL import Image

 # Configure logging
 logging.basicConfig(
@ -24,7 +28,8 @@ class PyNamer:
        """Initialize the PyNamer with configuration.
        
        Args:
-            config_path: Path to the YAML configuration file
+            config_path: Optional path to the YAML configuration file.
+                         If None, will look in default locations.
        """
        if config_path is None:
            # Look for config in user's home directory first
@ -34,8 +39,13 @@ class PyNamer:
                logger.info(f"Using user config from {user_config_path}")
            else:
                # Fall back to default config in package
-                config_path = os.path.join(os.path.dirname(__file__), 'config.yaml')
-                logger.info(f"Using default config from {config_path}")
+                package_dir = os.path.dirname(os.path.abspath(__file__))
+                config_path = os.path.join(package_dir, 'config.yaml')
+                if os.path.exists(config_path):
+                    logger.info(f"Using package config from {config_path}")
+                else:
+                    logger.error("No configuration file found in package directory")
+                    sys.exit(1)
        
        self.config = self._load_config(config_path)
        self._setup_llm()
@ -75,21 +85,54 @@ class PyNamer:
        self.model = llm_config.get('model', 'gpt-4-vision-preview')
        self.max_tokens = llm_config.get('max_tokens', 100)
        self.temperature = llm_config.get('temperature', 0.7)
+
+        # Image processing settings
+        image_config = self.config.get('image', {})
+        self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
+        self.resize_format = image_config.get('resize_format', 'JPEG')
        
        logger.info(f"LLM setup complete. Using model: {self.model}")
-    
-    def _encode_image(self, image_path: str) -> str:
-        """Encode image to base64 for API submission.
-        
+        logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
+
+    def _resize_and_encode_image(self, image_path: str) -> str:
+        """Resize image if necessary and encode to base64 for API submission.
+
        Args:
            image_path: Path to the image file
            
        Returns:
            Base64 encoded image string
        """
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
-    
+        try:
+            with Image.open(image_path) as img:
+                # Calculate new size maintaining aspect ratio
+                width, height = img.size
+                if max(width, height) > self.resize_max_dimension:
+                    if width > height:
+                        new_width = self.resize_max_dimension
+                        new_height = int(height * (self.resize_max_dimension / width))
+                    else:
+                        new_height = self.resize_max_dimension
+                        new_width = int(width * (self.resize_max_dimension / height))
+                    
+                    logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
+                    img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                else:
+                    logger.debug("Image size is within limits, no resize needed.")
+
+                # Save resized image to a bytes buffer
+                buffer = io.BytesIO()
+                # Handle potential transparency issues when saving as JPEG
+                if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
+                     img = img.convert('RGB')
+                img.save(buffer, format=self.resize_format)
+                img_bytes = buffer.getvalue()
+
+            return base64.b64encode(img_bytes).decode('utf-8')
+        except Exception as e:
+            logger.error(f"Error processing image {image_path}: {e}")
+            raise
+
    def _is_supported_format(self, file_path: str) -> bool:
        """Check if the file format is supported.
        
@ -121,9 +164,12 @@ class PyNamer:
            return None
        
        try:
-            # Encode image
-            base64_image = self._encode_image(image_path)
+            # Resize and encode image
+            base64_image = self._resize_and_encode_image(image_path)
            
+            # Determine the mime type based on the resize format
+            mime_type = f"image/{self.resize_format.lower()}"
+
            # Prepare messages for LLM
            system_message = self.config.get('prompt', {}).get('system_message', '')
            user_message = self.config.get('prompt', {}).get('user_message', '')
@ -136,7 +182,7 @@ class PyNamer:
                        {"type": "text", "text": user_message},
                        {
                            "type": "image_url",
-                            "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
+                            "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
                        }
                    ]
                }