Update README with new features and improved config handling

This commit is contained in:
Steve White 2025-03-28 23:55:37 -05:00
parent 1a8cae62ca
commit 11ea971542
2 changed files with 79 additions and 13 deletions

View File

@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
- Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision) - Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
- Configurable via YAML config file - Configurable via YAML config file
- Supports multiple image formats (jpg, jpeg, png, gif, webp) - Supports multiple image formats (jpg, jpeg, png, gif, webp)
- Automatically resizes large images before processing (configurable max dimension)
- Dry-run mode to preview changes without renaming files - Dry-run mode to preview changes without renaming files
- Handles filename collisions automatically - Handles filename collisions automatically
- Robust config file discovery (user config, package config, or explicit path)
## Installation ## Installation
@ -47,17 +49,35 @@ You can customize the following settings:
- LLM provider and model - LLM provider and model
- API key and endpoint - API key and endpoint
- Supported image formats - Supported image formats
- Image resizing parameters (max dimension, output format)
- Prompt templates for filename generation - Prompt templates for filename generation
Example configuration file: Example configuration file:
```yaml ```yaml
# LLM API Configuration
llm: llm:
provider: "openai" provider: "openai"
model: "gpt-4-vision-preview" model: "gpt-4-vision-preview"
api_key: "your-api-key-here" api_key: "your-api-key-here"
max_tokens: 100 max_tokens: 100
temperature: 0.7 temperature: 0.7
# Image Processing
image:
supported_formats:
- ".jpg"
- ".jpeg"
- ".png"
- ".gif"
- ".webp"
resize_max_dimension: 1024 # Max width/height before resizing
resize_format: "JPEG" # Output format for resized images
# Prompt Configuration
prompt:
system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
user_message: "Generate a descriptive filename for this image..."
``` ```
## Usage ## Usage

View File

@ -1,14 +1,18 @@
"""Core functionality for PyNamer.""" """Core functionality for PyNamer."""
import argparse
import base64 import base64
import io
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
import yaml import yaml
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
import litellm import litellm
from litellm import completion from litellm import completion
import logging import logging
from PIL import Image
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
@ -24,7 +28,8 @@ class PyNamer:
"""Initialize the PyNamer with configuration. """Initialize the PyNamer with configuration.
Args: Args:
config_path: Path to the YAML configuration file config_path: Optional path to the YAML configuration file.
If None, will look in default locations.
""" """
if config_path is None: if config_path is None:
# Look for config in user's home directory first # Look for config in user's home directory first
@ -34,8 +39,13 @@ class PyNamer:
logger.info(f"Using user config from {user_config_path}") logger.info(f"Using user config from {user_config_path}")
else: else:
# Fall back to default config in package # Fall back to default config in package
config_path = os.path.join(os.path.dirname(__file__), 'config.yaml') package_dir = os.path.dirname(os.path.abspath(__file__))
logger.info(f"Using default config from {config_path}") config_path = os.path.join(package_dir, 'config.yaml')
if os.path.exists(config_path):
logger.info(f"Using package config from {config_path}")
else:
logger.error("No configuration file found in package directory")
sys.exit(1)
self.config = self._load_config(config_path) self.config = self._load_config(config_path)
self._setup_llm() self._setup_llm()
@ -76,10 +86,16 @@ class PyNamer:
self.max_tokens = llm_config.get('max_tokens', 100) self.max_tokens = llm_config.get('max_tokens', 100)
self.temperature = llm_config.get('temperature', 0.7) self.temperature = llm_config.get('temperature', 0.7)
logger.info(f"LLM setup complete. Using model: {self.model}") # Image processing settings
image_config = self.config.get('image', {})
self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
self.resize_format = image_config.get('resize_format', 'JPEG')
def _encode_image(self, image_path: str) -> str: logger.info(f"LLM setup complete. Using model: {self.model}")
"""Encode image to base64 for API submission. logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
def _resize_and_encode_image(self, image_path: str) -> str:
"""Resize image if necessary and encode to base64 for API submission.
Args: Args:
image_path: Path to the image file image_path: Path to the image file
@ -87,8 +103,35 @@ class PyNamer:
Returns: Returns:
Base64 encoded image string Base64 encoded image string
""" """
with open(image_path, "rb") as image_file: try:
return base64.b64encode(image_file.read()).decode('utf-8') with Image.open(image_path) as img:
# Calculate new size maintaining aspect ratio
width, height = img.size
if max(width, height) > self.resize_max_dimension:
if width > height:
new_width = self.resize_max_dimension
new_height = int(height * (self.resize_max_dimension / width))
else:
new_height = self.resize_max_dimension
new_width = int(width * (self.resize_max_dimension / height))
logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
else:
logger.debug("Image size is within limits, no resize needed.")
# Save resized image to a bytes buffer
buffer = io.BytesIO()
# Handle potential transparency issues when saving as JPEG
if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
img.save(buffer, format=self.resize_format)
img_bytes = buffer.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')
except Exception as e:
logger.error(f"Error processing image {image_path}: {e}")
raise
def _is_supported_format(self, file_path: str) -> bool: def _is_supported_format(self, file_path: str) -> bool:
"""Check if the file format is supported. """Check if the file format is supported.
@ -121,8 +164,11 @@ class PyNamer:
return None return None
try: try:
# Encode image # Resize and encode image
base64_image = self._encode_image(image_path) base64_image = self._resize_and_encode_image(image_path)
# Determine the mime type based on the resize format
mime_type = f"image/{self.resize_format.lower()}"
# Prepare messages for LLM # Prepare messages for LLM
system_message = self.config.get('prompt', {}).get('system_message', '') system_message = self.config.get('prompt', {}).get('system_message', '')
@ -136,7 +182,7 @@ class PyNamer:
{"type": "text", "text": user_message}, {"type": "text", "text": user_message},
{ {
"type": "image_url", "type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} "image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
} }
] ]
} }