Update README with new features and improved config handling

This commit is contained in:
Steve White 2025-03-28 23:55:37 -05:00
parent 1a8cae62ca
commit 11ea971542
2 changed files with 79 additions and 13 deletions

View File

@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
- Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
- Configurable via YAML config file
- Supports multiple image formats (jpg, jpeg, png, gif, webp)
- Automatically resizes large images before processing (configurable max dimension)
- Dry-run mode to preview changes without renaming files
- Handles filename collisions automatically
- Robust config file discovery (user config, package config, or explicit path)
## Installation
@ -47,17 +49,35 @@ You can customize the following settings:
- LLM provider and model
- API key and endpoint
- Supported image formats
- Image resizing parameters (max dimension, output format)
- Prompt templates for filename generation
Example configuration file:
```yaml
# LLM API Configuration
llm:
provider: "openai"
model: "gpt-4-vision-preview"
api_key: "your-api-key-here"
max_tokens: 100
temperature: 0.7
# Image Processing
image:
supported_formats:
- ".jpg"
- ".jpeg"
- ".png"
- ".gif"
- ".webp"
resize_max_dimension: 1024 # Max width/height before resizing
resize_format: "JPEG" # Output format for resized images
# Prompt Configuration
prompt:
system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
user_message: "Generate a descriptive filename for this image..."
```
## Usage

View File

@ -1,14 +1,18 @@
"""Core functionality for PyNamer."""
import argparse
import base64
import io
import os
import sys
from pathlib import Path
import yaml
from typing import Dict, List, Optional, Union
import litellm
from litellm import completion
import logging
from PIL import Image
# Configure logging
logging.basicConfig(
@ -24,7 +28,8 @@ class PyNamer:
"""Initialize the PyNamer with configuration.
Args:
config_path: Path to the YAML configuration file
config_path: Optional path to the YAML configuration file.
If None, will look in default locations.
"""
if config_path is None:
# Look for config in user's home directory first
@ -34,8 +39,13 @@ class PyNamer:
logger.info(f"Using user config from {user_config_path}")
else:
# Fall back to default config in package
config_path = os.path.join(os.path.dirname(__file__), 'config.yaml')
logger.info(f"Using default config from {config_path}")
package_dir = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(package_dir, 'config.yaml')
if os.path.exists(config_path):
logger.info(f"Using package config from {config_path}")
else:
logger.error("No configuration file found in package directory")
sys.exit(1)
self.config = self._load_config(config_path)
self._setup_llm()
@ -75,21 +85,54 @@ class PyNamer:
self.model = llm_config.get('model', 'gpt-4-vision-preview')
self.max_tokens = llm_config.get('max_tokens', 100)
self.temperature = llm_config.get('temperature', 0.7)
# Image processing settings
image_config = self.config.get('image', {})
self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
self.resize_format = image_config.get('resize_format', 'JPEG')
logger.info(f"LLM setup complete. Using model: {self.model}")
def _encode_image(self, image_path: str) -> str:
"""Encode image to base64 for API submission.
logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
def _resize_and_encode_image(self, image_path: str) -> str:
"""Resize image if necessary and encode to base64 for API submission.
Args:
image_path: Path to the image file
Returns:
Base64 encoded image string
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
try:
with Image.open(image_path) as img:
# Calculate new size maintaining aspect ratio
width, height = img.size
if max(width, height) > self.resize_max_dimension:
if width > height:
new_width = self.resize_max_dimension
new_height = int(height * (self.resize_max_dimension / width))
else:
new_height = self.resize_max_dimension
new_width = int(width * (self.resize_max_dimension / height))
logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
else:
logger.debug("Image size is within limits, no resize needed.")
# Save resized image to a bytes buffer
buffer = io.BytesIO()
# Handle potential transparency issues when saving as JPEG
if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
img.save(buffer, format=self.resize_format)
img_bytes = buffer.getvalue()
return base64.b64encode(img_bytes).decode('utf-8')
except Exception as e:
logger.error(f"Error processing image {image_path}: {e}")
raise
def _is_supported_format(self, file_path: str) -> bool:
"""Check if the file format is supported.
@ -121,9 +164,12 @@ class PyNamer:
return None
try:
# Encode image
base64_image = self._encode_image(image_path)
# Resize and encode image
base64_image = self._resize_and_encode_image(image_path)
# Determine the mime type based on the resize format
mime_type = f"image/{self.resize_format.lower()}"
# Prepare messages for LLM
system_message = self.config.get('prompt', {}).get('system_message', '')
user_message = self.config.get('prompt', {}).get('user_message', '')
@ -136,7 +182,7 @@ class PyNamer:
{"type": "text", "text": user_message},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
"image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
}
]
}