Update README with new features and improved config handling
This commit is contained in:
parent
1a8cae62ca
commit
11ea971542
20
README.md
20
README.md
|
@ -7,8 +7,10 @@ PyNamer is a command-line tool that uses AI vision models to generate descriptiv
|
||||||
- Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
|
- Uses LiteLLM to integrate with various vision-capable LLMs (default: GPT-4 Vision)
|
||||||
- Configurable via YAML config file
|
- Configurable via YAML config file
|
||||||
- Supports multiple image formats (jpg, jpeg, png, gif, webp)
|
- Supports multiple image formats (jpg, jpeg, png, gif, webp)
|
||||||
|
- Automatically resizes large images before processing (configurable max dimension)
|
||||||
- Dry-run mode to preview changes without renaming files
|
- Dry-run mode to preview changes without renaming files
|
||||||
- Handles filename collisions automatically
|
- Handles filename collisions automatically
|
||||||
|
- Robust config file discovery (user config, package config, or explicit path)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
@ -47,17 +49,35 @@ You can customize the following settings:
|
||||||
- LLM provider and model
|
- LLM provider and model
|
||||||
- API key and endpoint
|
- API key and endpoint
|
||||||
- Supported image formats
|
- Supported image formats
|
||||||
|
- Image resizing parameters (max dimension, output format)
|
||||||
- Prompt templates for filename generation
|
- Prompt templates for filename generation
|
||||||
|
|
||||||
Example configuration file:
|
Example configuration file:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
# LLM API Configuration
|
||||||
llm:
|
llm:
|
||||||
provider: "openai"
|
provider: "openai"
|
||||||
model: "gpt-4-vision-preview"
|
model: "gpt-4-vision-preview"
|
||||||
api_key: "your-api-key-here"
|
api_key: "your-api-key-here"
|
||||||
max_tokens: 100
|
max_tokens: 100
|
||||||
temperature: 0.7
|
temperature: 0.7
|
||||||
|
|
||||||
|
# Image Processing
|
||||||
|
image:
|
||||||
|
supported_formats:
|
||||||
|
- ".jpg"
|
||||||
|
- ".jpeg"
|
||||||
|
- ".png"
|
||||||
|
- ".gif"
|
||||||
|
- ".webp"
|
||||||
|
resize_max_dimension: 1024 # Max width/height before resizing
|
||||||
|
resize_format: "JPEG" # Output format for resized images
|
||||||
|
|
||||||
|
# Prompt Configuration
|
||||||
|
prompt:
|
||||||
|
system_message: "You are a helpful assistant that generates concise, descriptive filenames..."
|
||||||
|
user_message: "Generate a descriptive filename for this image..."
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
|
@ -1,14 +1,18 @@
|
||||||
"""Core functionality for PyNamer."""
|
"""Core functionality for PyNamer."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import yaml
|
import yaml
|
||||||
from typing import Dict, List, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
import logging
|
import logging
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
@ -24,7 +28,8 @@ class PyNamer:
|
||||||
"""Initialize the PyNamer with configuration.
|
"""Initialize the PyNamer with configuration.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config_path: Path to the YAML configuration file
|
config_path: Optional path to the YAML configuration file.
|
||||||
|
If None, will look in default locations.
|
||||||
"""
|
"""
|
||||||
if config_path is None:
|
if config_path is None:
|
||||||
# Look for config in user's home directory first
|
# Look for config in user's home directory first
|
||||||
|
@ -34,8 +39,13 @@ class PyNamer:
|
||||||
logger.info(f"Using user config from {user_config_path}")
|
logger.info(f"Using user config from {user_config_path}")
|
||||||
else:
|
else:
|
||||||
# Fall back to default config in package
|
# Fall back to default config in package
|
||||||
config_path = os.path.join(os.path.dirname(__file__), 'config.yaml')
|
package_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
logger.info(f"Using default config from {config_path}")
|
config_path = os.path.join(package_dir, 'config.yaml')
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
logger.info(f"Using package config from {config_path}")
|
||||||
|
else:
|
||||||
|
logger.error("No configuration file found in package directory")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
self.config = self._load_config(config_path)
|
self.config = self._load_config(config_path)
|
||||||
self._setup_llm()
|
self._setup_llm()
|
||||||
|
@ -76,10 +86,16 @@ class PyNamer:
|
||||||
self.max_tokens = llm_config.get('max_tokens', 100)
|
self.max_tokens = llm_config.get('max_tokens', 100)
|
||||||
self.temperature = llm_config.get('temperature', 0.7)
|
self.temperature = llm_config.get('temperature', 0.7)
|
||||||
|
|
||||||
logger.info(f"LLM setup complete. Using model: {self.model}")
|
# Image processing settings
|
||||||
|
image_config = self.config.get('image', {})
|
||||||
|
self.resize_max_dimension = image_config.get('resize_max_dimension', 1024)
|
||||||
|
self.resize_format = image_config.get('resize_format', 'JPEG')
|
||||||
|
|
||||||
def _encode_image(self, image_path: str) -> str:
|
logger.info(f"LLM setup complete. Using model: {self.model}")
|
||||||
"""Encode image to base64 for API submission.
|
logger.info(f"Image resize settings: max_dimension={self.resize_max_dimension}, format={self.resize_format}")
|
||||||
|
|
||||||
|
def _resize_and_encode_image(self, image_path: str) -> str:
|
||||||
|
"""Resize image if necessary and encode to base64 for API submission.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
image_path: Path to the image file
|
image_path: Path to the image file
|
||||||
|
@ -87,8 +103,35 @@ class PyNamer:
|
||||||
Returns:
|
Returns:
|
||||||
Base64 encoded image string
|
Base64 encoded image string
|
||||||
"""
|
"""
|
||||||
with open(image_path, "rb") as image_file:
|
try:
|
||||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
with Image.open(image_path) as img:
|
||||||
|
# Calculate new size maintaining aspect ratio
|
||||||
|
width, height = img.size
|
||||||
|
if max(width, height) > self.resize_max_dimension:
|
||||||
|
if width > height:
|
||||||
|
new_width = self.resize_max_dimension
|
||||||
|
new_height = int(height * (self.resize_max_dimension / width))
|
||||||
|
else:
|
||||||
|
new_height = self.resize_max_dimension
|
||||||
|
new_width = int(width * (self.resize_max_dimension / height))
|
||||||
|
|
||||||
|
logger.debug(f"Resizing image from {width}x{height} to {new_width}x{new_height}")
|
||||||
|
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||||
|
else:
|
||||||
|
logger.debug("Image size is within limits, no resize needed.")
|
||||||
|
|
||||||
|
# Save resized image to a bytes buffer
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
# Handle potential transparency issues when saving as JPEG
|
||||||
|
if self.resize_format.upper() == 'JPEG' and img.mode in ('RGBA', 'P'):
|
||||||
|
img = img.convert('RGB')
|
||||||
|
img.save(buffer, format=self.resize_format)
|
||||||
|
img_bytes = buffer.getvalue()
|
||||||
|
|
||||||
|
return base64.b64encode(img_bytes).decode('utf-8')
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing image {image_path}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
def _is_supported_format(self, file_path: str) -> bool:
|
def _is_supported_format(self, file_path: str) -> bool:
|
||||||
"""Check if the file format is supported.
|
"""Check if the file format is supported.
|
||||||
|
@ -121,8 +164,11 @@ class PyNamer:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Encode image
|
# Resize and encode image
|
||||||
base64_image = self._encode_image(image_path)
|
base64_image = self._resize_and_encode_image(image_path)
|
||||||
|
|
||||||
|
# Determine the mime type based on the resize format
|
||||||
|
mime_type = f"image/{self.resize_format.lower()}"
|
||||||
|
|
||||||
# Prepare messages for LLM
|
# Prepare messages for LLM
|
||||||
system_message = self.config.get('prompt', {}).get('system_message', '')
|
system_message = self.config.get('prompt', {}).get('system_message', '')
|
||||||
|
@ -136,7 +182,7 @@ class PyNamer:
|
||||||
{"type": "text", "text": user_message},
|
{"type": "text", "text": user_message},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
"image_url": {"url": f"data:{mime_type};base64,{base64_image}"}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue