Update README with new features and improved config handling
This commit is contained in:
parent
1a8cae62ca
commit
11ea971542
20
README.md
20
README.md
|
@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
|
|||
- Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
|
||||
- Configurable via YAML config file
|
||||
- Supports multiple image formats (jpg, jpeg, png, gif, webp)
|
||||
- Automatically resizes large images before processing (configurable max dimension)
|
||||
- Dry-run mode to preview changes without renaming files
|
||||
- Handles filename collisions automatically
|
||||
- Robust config file discovery (user config, package config, or explicit path)
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -47,17 +49,35 @@ You can customize the following settings:
|
|||
- LLM provider and model
|
||||
- API key and endpoint
|
||||
- Supported image formats
|
||||
- Image resizing parameters (max dimension, output format)
|
||||
- Prompt templates for filename generation
|
||||
|
||||
Example configuration file:
|
||||
|
||||
```yaml
|
||||
# LLM API Configuration
|
||||
llm:
|
||||
provider: "openai"
|
||||
model: "gpt-4-vision-preview"
|
||||
api_key: "your-api-key-here"
|
||||
max_tokens: 100
|
||||
temperature: 0.7
|
||||
|
||||
# Image Processing
|
||||
image:
|
||||
supported_formats:
|
||||
- ".jpg"
|
||||
- ".jpeg"
|
||||
- ".png"
|
||||
- ".gif"
|
||||
- ".webp"
|
||||
resize_max_dimension: 1024 # Max width/height before resizing
|
||||
resize_format: "JPEG" # Output format for resized images
|
||||
|
||||
# Prompt Configuration
|
||||
prompt:
|
||||
system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
|
||||
user_message: "Generate a descriptive filename for this image..."
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
"""Core functionality for PyNamer."""
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm import completion
|
||||
import logging
|
||||
from PIL import Image
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
|
@ -24,7 +28,8 @@ class PyNamer:
|
|||
"""Initialize the PyNamer with configuration.
|
||||
|
||||
Args:
|
||||
config_path: Path to the YAML configuration file
|
||||
config_path: Optional path to the YAML configuration file.
|
||||
If None, will look in default locations.
|
||||
"""
|
||||
if config_path is None:
|
||||
# Look for config in user's home directory first
|
||||
|
@ -34,8 +39,13 @@ class PyNamer:
|
|||
logger.info(f"Using user config from {user_config_path}")
|
||||
else:
|
||||
# Fall back to default config in package
|
||||
config_path = os.path.join(os.path.dirname(__file__), 'config.yaml')
|
||||
logger.info(f"Using default config from {config_path}")
|
||||
package_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
config_path = os.path.join(package_dir, 'config.yaml')
|
||||
if os.path.exists(config_path):
|
||||
logger.info(f"Using package config from {config_path}")
|
||||
else:
|
||||
logger.error("No configuration file found in package directory")
|
||||
sys.exit(1)
|
||||
|
||||
self.config = self._load_config(config_path)
|
||||
self._setup_llm()
|
||||
|
@ -75,21 +85,54 @@ class PyNamer:
|
|||
self.model = llm_config.get('model', 'gpt-4-vision-preview')
|
||||
self.max_tokens = llm_config.get('max_tokens', 100)
|
||||
self.temperature = llm_config.get('temperature', 0.7)
|
||||
|
||||
# Image processing settings
|
||||
image_config = self.config.get('image', {})
|
||||
self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
|
||||
self.resize_format = image_config.get('resize_format', 'JPEG')
|
||||
|
||||
logger.info(f"LLM setup complete. Using model: {self.model}")
|
||||
|
||||
def _encode_image(self, image_path: str) -> str:
|
||||
"""Encode image to base64 for API submission.
|
||||
|
||||
logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
|
||||
|
||||
def _resize_and_encode_image(self, image_path: str) -> str:
|
||||
"""Resize image if necessary and encode to base64 for API submission.
|
||||
|
||||
Args:
|
||||
image_path: Path to the image file
|
||||
|
||||
Returns:
|
||||
Base64 encoded image string
|
||||
"""
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
# Calculate new size maintaining aspect ratio
|
||||
width, height = img.size
|
||||
if max(width, height) > self.resize_max_dimension:
|
||||
if width > height:
|
||||
new_width = self.resize_max_dimension
|
||||
new_height = int(height * (self.resize_max_dimension / width))
|
||||
else:
|
||||
new_height = self.resize_max_dimension
|
||||
new_width = int(width * (self.resize_max_dimension / height))
|
||||
|
||||
logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
|
||||
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
else:
|
||||
logger.debug("Image size is within limits, no resize needed.")
|
||||
|
||||
# Save resized image to a bytes buffer
|
||||
buffer = io.BytesIO()
|
||||
# Handle potential transparency issues when saving as JPEG
|
||||
if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
img.save(buffer, format=self.resize_format)
|
||||
img_bytes = buffer.getvalue()
|
||||
|
||||
return base64.b64encode(img_bytes).decode('utf-8')
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing image {image_path}: {e}")
|
||||
raise
|
||||
|
||||
def _is_supported_format(self, file_path: str) -> bool:
|
||||
"""Check if the file format is supported.
|
||||
|
||||
|
@ -121,9 +164,12 @@ class PyNamer:
|
|||
return None
|
||||
|
||||
try:
|
||||
# Encode image
|
||||
base64_image = self._encode_image(image_path)
|
||||
# Resize and encode image
|
||||
base64_image = self._resize_and_encode_image(image_path)
|
||||
|
||||
# Determine the mime type based on the resize format
|
||||
mime_type = f"image/{self.resize_format.lower()}"
|
||||
|
||||
# Prepare messages for LLM
|
||||
system_message = self.config.get('prompt', {}).get('system_message', '')
|
||||
user_message = self.config.get('prompt', {}).get('user_message', '')
|
||||
|
@ -136,7 +182,7 @@ class PyNamer:
|
|||
{"type": "text", "text": user_message},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
||||
"image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue