pynamer/pynamer.py

274 lines
9.4 KiB
Python
Executable File

#!/Volumes/SAM2/CODE/pynamer/.venv/bin/python
import argparse
import base64
import os
import sys
from pathlib import Path
import yaml
from typing import Dict, List, Optional, Union
import litellm
from litellm import completion
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('pynamer')
class PyNamer:
"""A tool to generate descriptive filenames for images using LLMs."""
def __init__(self, config_path: str = 'config.yaml'):
"""Initialize the PyNamer with configuration.
Args:
config_path: Path to the YAML configuration file
"""
self.config = self._load_config(config_path)
self._setup_llm()
def _load_config(self, config_path: str) -> Dict:
"""Load configuration from YAML file.
Args:
config_path: Path to the configuration file
Returns:
Dict containing configuration
"""
try:
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
logger.info(f"Loaded configuration from {config_path}")
return config
except Exception as e:
logger.error(f"Failed to load configuration: {e}")
sys.exit(1)
def _setup_llm(self) -> None:
"""Set up the LLM client based on configuration."""
llm_config = self.config.get('llm', {})
# Set API key if provided in config
api_key = llm_config.get('api_key')
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
# Set custom endpoint if provided
endpoint = llm_config.get('endpoint')
if endpoint:
os.environ["OPENAI_API_BASE"] = endpoint
self.model = llm_config.get('model', 'gpt-4-vision-preview')
self.max_tokens = llm_config.get('max_tokens', 100)
self.temperature = llm_config.get('temperature', 0.7)
logger.info(f"LLM setup complete. Using model: {self.model}")
def _encode_image(self, image_path: str) -> str:
"""Encode image to base64 for API submission.
Args:
image_path: Path to the image file
Returns:
Base64 encoded image string
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def _is_supported_format(self, file_path: str) -> bool:
"""Check if the file format is supported.
Args:
file_path: Path to the file
Returns:
True if supported, False otherwise
"""
supported_formats = self.config.get('image', {}).get('supported_formats', [])
file_ext = os.path.splitext(file_path)[1].lower()
return file_ext in supported_formats
def generate_filename(self, image_path: str) -> str:
"""Generate a descriptive filename for the image using LLM.
Args:
image_path: Path to the image file
Returns:
Generated filename (without extension)
"""
if not os.path.exists(image_path):
logger.error(f"Image not found: {image_path}")
return None
if not self._is_supported_format(image_path):
logger.error(f"Unsupported file format: {image_path}")
return None
try:
# Encode image
base64_image = self._encode_image(image_path)
# Prepare messages for LLM
system_message = self.config.get('prompt', {}).get('system_message', '')
user_message = self.config.get('prompt', {}).get('user_message', '')
messages = [
{"role": "system", "content": system_message},
{
"role": "user",
"content": [
{"type": "text", "text": user_message},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
}
]
# Call LLM
response = completion(
model=self.model,
messages=messages,
max_tokens=self.max_tokens,
temperature=self.temperature
)
# Extract filename from response
filename = response.choices[0].message.content.strip()
logger.info(f"Generated filename: {filename}")
return filename
except Exception as e:
logger.error(f"Error generating filename: {e}")
return None
def rename_image(self, image_path: str, dry_run: bool = False) -> Optional[str]:
"""Rename the image with a generated descriptive filename.
Args:
image_path: Path to the image file
dry_run: If True, don't actually rename the file
Returns:
New path if successful, None otherwise
"""
# Generate filename
new_filename = self.generate_filename(image_path)
if not new_filename:
return None
# Clean up the filename (ensure snake_case, no special chars)
new_filename = new_filename.lower()
new_filename = ''.join(c if c.isalnum() else '_' for c in new_filename)
new_filename = new_filename.replace('__', '_').strip('_')
# Get original path components
path = Path(image_path)
directory = path.parent
extension = path.suffix
# Create new path
new_path = directory / f"{new_filename}{extension}"
# Rename file
if not dry_run:
try:
# Handle case where the new filename already exists
counter = 1
while new_path.exists():
new_path = directory / f"{new_filename}_{counter}{extension}"
counter += 1
path.rename(new_path)
logger.info(f"Renamed: {image_path} -> {new_path}")
except Exception as e:
logger.error(f"Error renaming file: {e}")
return None
else:
logger.info(f"[DRY RUN] Would rename: {image_path} -> {new_path}")
return str(new_path)
def main():
"""Main entry point for the script."""
parser = argparse.ArgumentParser(description='Generate descriptive filenames for images using LLMs')
parser.add_argument('images', nargs='+', help='Paths to image files')
parser.add_argument('-c', '--config', default='config.yaml', help='Path to configuration file')
parser.add_argument('-d', '--dry-run', action='store_true', help='Preview changes without renaming files')
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
args = parser.parse_args()
# Set logging level
if args.verbose:
logger.setLevel(logging.DEBUG)
# Initialize PyNamer
namer = PyNamer(config_path=args.config)
# Process each image - handle image paths that might contain spaces
image_paths = []
# First, try to handle the case where the entire argument list is a single path with spaces
if len(args.images) > 1:
combined_path = ' '.join(args.images)
if os.path.exists(combined_path):
image_paths = [combined_path]
logger.info(f"Found image by combining all arguments: {combined_path}")
# If that didn't work, try to handle each argument individually
if not image_paths:
# Use a set to avoid duplicate processing
processed_paths = set()
for path in args.images:
# Check if the path exists as is
if os.path.exists(path) and path not in processed_paths:
image_paths.append(path)
processed_paths.add(path)
else:
# Try to find files that match the pattern
import glob
matching_files = glob.glob(f"*{path}*")
for file in matching_files:
if file not in processed_paths:
image_paths.append(file)
processed_paths.add(file)
if not matching_files:
logger.debug(f"Could not find any file matching '{path}'")
# Process each valid image path
if not image_paths:
print("Error: No valid image files found to process.")
return
# Remove duplicates while preserving order
unique_image_paths = []
seen = set()
for path in image_paths:
if path not in seen:
unique_image_paths.append(path)
seen.add(path)
# Process each image
for image_path in unique_image_paths:
if not os.path.exists(image_path):
logger.warning(f"Skipping non-existent file: {image_path}")
continue
new_path = namer.rename_image(image_path, dry_run=args.dry_run)
if new_path:
print(f"{'[DRY RUN] ' if args.dry_run else ''}Renamed: {image_path} -> {new_path}")
else:
print(f"Failed to process: {image_path}")
if __name__ == "__main__":
main()