feat(video): change layout to vertical

This commit is contained in:
Harshad 2026-02-04 22:07:05 +05:30
parent 2d73f0aa61
commit da2a247e97
2 changed files with 475 additions and 7 deletions

View file

@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
# Asset paths (relative to this file's directory)
SCRIPT_DIR = Path(__file__).parent
ASSET_DIR = SCRIPT_DIR / "assets"
BASE_IMG = ASSET_DIR / "1080x1080-bg.png"
BASE_IMG = f"{ASSET_DIR}/1080x1920-bg.png" # 1080x1920px portrait image
VINYL_TEMPLATE = ASSET_DIR / "736-x-736-record.png"
DEFAULT_RECORD = ASSET_DIR / "default-record.png"
NEEDLE_IMG = ASSET_DIR / "needle.png"
@ -45,7 +45,7 @@ NEEDLE_IMG = ASSET_DIR / "needle.png"
# Video settings
VIDEO_FORMAT = "mp4"
FINAL_VIDEO_WIDTH_PX = 720
FINAL_VIDEO_HEIGHT_PX = 720
FINAL_VIDEO_HEIGHT_PX = 1280 # 9:16 aspect ratio (720 * 16/9 = 1280)
FRAME_RATE = 15
# Composite image settings (for static record image export)
@ -62,17 +62,29 @@ SCALE_FACTOR = FINAL_VIDEO_WIDTH_PX / ORIGINAL_RESOLUTION
BASE_WIDTH = FINAL_VIDEO_WIDTH_PX
BASE_HEIGHT = FINAL_VIDEO_HEIGHT_PX
# Pet image settings
PET_SIZE_PX = int(360 * SCALE_FACTOR)
# Vinyl template settings
VINYL_SIZE_PX = int(736 * SCALE_FACTOR)
# Vinyl record positioning
# Centered horizontally, positioned vertically
VINYL_VERTICAL_ADJUST = int(95 * SCALE_FACTOR) # Adjust this: positive = down, negative = up
VINYL_OFFSET_X = (BASE_WIDTH - VINYL_SIZE_PX) // 2
VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + int(100 * SCALE_FACTOR)
VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + VINYL_VERTICAL_ADJUST
# Pet image positioning (relative to vinyl record center)
# Automatically centered within the vinyl record
PET_OFFSET_X = VINYL_OFFSET_X + (VINYL_SIZE_PX - PET_SIZE_PX) // 2
PET_OFFSET_Y = VINYL_OFFSET_Y + (VINYL_SIZE_PX - PET_SIZE_PX) // 2
NEEDLE_OFFSET_X = 0
NEEDLE_OFFSET_Y = 0
# Needle positioning
# Centered horizontally, positioned vertically
NEEDLE_VERTICAL_ADJUST = int(400 * SCALE_FACTOR) # Adjust this: positive = down, negative = up
NEEDLE_SIZE_PX = int(1080 * SCALE_FACTOR) # Scales proportionally with other elements
NEEDLE_OFFSET_X = (BASE_WIDTH - NEEDLE_SIZE_PX) // 2
NEEDLE_OFFSET_Y = 0 + NEEDLE_VERTICAL_ADJUST
def create_record_composite(pet_img_path: str, output_path: str) -> None:
@ -136,8 +148,8 @@ def load_and_resize_images(pet_img_path: str | None) -> tuple:
# Load needle image
needle_img = Image.open(NEEDLE_IMG).convert("RGBA")
if needle_img.size != (BASE_WIDTH, BASE_HEIGHT):
needle_img = needle_img.resize((BASE_WIDTH, BASE_HEIGHT), resize_mode)
if needle_img.size != (NEEDLE_SIZE_PX, NEEDLE_SIZE_PX):
needle_img = needle_img.resize((NEEDLE_SIZE_PX, NEEDLE_SIZE_PX), resize_mode)
logger.info(f"Images loaded - Base: {base_img.size}, Pet: {pet_img.size if pet_img else 'None'}, Vinyl: {vinyl_img.size}")
return base_img, pet_img, vinyl_img, needle_img

View file

@ -0,0 +1,456 @@
"""
Video Creation Script - Vinyl Record Animation
Creates an animated video with a rotating vinyl record effect by layering:
- Base background image (1080x1080)
- Pet/cover art image (centered in vinyl, rotates with record)
- Vinyl record template (transparent PNG, rotates)
- Needle overlay (stationary)
The script generates ONE complete rotation cycle and streams frames directly to
FFmpeg, which seamlessly loops the rotation to match the audio duration. This
approach is significantly faster and more efficient than generating all frames.
The vinyl rotates at a configurable RPM (default: 20 RPM for smooth animation).
Audio duration must be at least as long as one full rotation.
Dependency:
- ffmpeg should be installed on the OS and be present on the system $PATH
Usage:
main(pet_img_path="path/to/image.jpg", audio_track_path="path/to/audio.mp3")
Run script with uv:
$ uv run create-video.py
Configuration:
Only settings in the below categories should be modified:
- Asset paths
- Video settings
- Rotation settings
- Output settings
Do not modify calculated values (image positioning offsets, output settings).
"""
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "mutagen",
# "pillow",
# ]
# ///
import shutil
import subprocess
import time
from datetime import datetime as dt
from math import ceil
from pathlib import Path
from mutagen.mp3 import MP3
from PIL import Image
# START Configuration
# Asset paths
ASSET_DIR = "./assets"
# BASE_IMG = f"{ASSET_DIR}/1080x1080-bg.png" # 1080 sq. px. image
BASE_IMG = f"{ASSET_DIR}/1080x1920-bg.png" # 1080x1920 px portrait image
VINYL_TEMPLATE = f"{ASSET_DIR}/736-x-736-record.png" # 736sq.px. image
NEEDLE_IMG = f"{ASSET_DIR}/needle.png" # 1080 sq. px. image
# Video settings
VIDEO_FORMAT = "mp4"
FINAL_VIDEO_WIDTH_PX = 720 # lower this for faster generation
# FINAL_VIDEO_HEIGHT_PX = 720 # lower this for faster generation
FINAL_VIDEO_HEIGHT_PX = 1280 # 9:16 aspect ratio (720 * 16/9 = 1280)
FRAME_RATE = 15
# Rotation settings
# Rotations per minute (33.33 = standard LP speed, 45 = single speed)
# Lower value for slower rotation/spin
VINYL_RPM = 20
# Output settings
OUTPUT_DIR = "./output"
OUTPUT_VIDEO = (
f"{OUTPUT_DIR}/final_video-{dt.now().strftime('%Y%b%d_%H%M%S')}.{VIDEO_FORMAT}"
)
# END Configuration
# Image positioning offsets (centered on base image)
# These can be adjusted for fine-tuning
# We'll use width as our reference dimension since the vinyl assets
# are designed for a 1080x1080 square canvas
ORIGINAL_WIDTH = 1080
# Scale factor based on target resolution
SCALE_FACTOR = FINAL_VIDEO_WIDTH_PX / ORIGINAL_WIDTH
BASE_WIDTH = FINAL_VIDEO_WIDTH_PX
BASE_HEIGHT = FINAL_VIDEO_HEIGHT_PX
# Pet image settings
PET_SIZE_PX = int(360 * SCALE_FACTOR) # Size to fit in vinyl center circle
# Vinyl template settings
VINYL_SIZE_PX = int(736 * SCALE_FACTOR)
# Needle settings (scales proportionally with other elements)
NEEDLE_SIZE_PX = int(1080 * SCALE_FACTOR) # 720px at current resolution
# ============================================================================
# OFFSET ADJUSTMENT SECTION - Modify these values to fine-tune positioning
# ============================================================================
# Note: Positive values move RIGHT (X) or DOWN (Y), negative values move LEFT/UP
# All offsets are calculated to center elements, then adjusted as needed
# Vinyl record positioning
# Centered horizontally, positioned vertically (adjust VINYL_VERTICAL_ADJUST to move up/down)
VINYL_VERTICAL_ADJUST = int(95 * SCALE_FACTOR) # Adjust this: positive = down, negative = up
VINYL_OFFSET_X = (BASE_WIDTH - VINYL_SIZE_PX) // 2
VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + VINYL_VERTICAL_ADJUST
# Pet image positioning (relative to vinyl record center)
# Automatically centered within the vinyl record
PET_OFFSET_X = VINYL_OFFSET_X + (VINYL_SIZE_PX - PET_SIZE_PX) // 2
PET_OFFSET_Y = VINYL_OFFSET_Y + (VINYL_SIZE_PX - PET_SIZE_PX) // 2
# Needle positioning
# Centered horizontally, positioned vertically (adjust NEEDLE_VERTICAL_ADJUST to move up/down)
NEEDLE_VERTICAL_ADJUST = int(400 * SCALE_FACTOR) # Adjust this: positive = down, negative = up
NEEDLE_OFFSET_X = (BASE_WIDTH - NEEDLE_SIZE_PX) // 2
NEEDLE_OFFSET_Y = 0 + NEEDLE_VERTICAL_ADJUST
# ============================================================================
def ensure_directories():
"""Create output directories if they don't exist."""
Path(OUTPUT_DIR).mkdir(exist_ok=True)
print(f"✓ Output directories created/verified")
def get_audio_duration(audio_track_path):
"""Read the duration of the audio track."""
print("Reading audio track duration...")
audio = MP3(audio_track_path)
duration = ceil(audio.info.length) # round up any fractional seconds
print(f"✓ Audio duration: {duration:.2f} seconds ({duration / 60:.2f} minutes)")
return duration
def load_and_resize_images(pet_img_path):
"""Load all images and resize them to required dimensions."""
print("Loading and resizing images...")
# Use LANCZOS for initial resize (better quality, only done once)
resize_mode = Image.Resampling.LANCZOS
# Load base image (should already be 1080x1080)
base_img = Image.open(BASE_IMG).convert("RGBA")
if base_img.size != (BASE_WIDTH, BASE_HEIGHT):
base_img = base_img.resize((BASE_WIDTH, BASE_HEIGHT), resize_mode)
# Load and resize pet image to 360x360
pet_img = Image.open(pet_img_path).convert("RGBA")
pet_img = pet_img.resize((PET_SIZE_PX, PET_SIZE_PX), resize_mode)
# Load vinyl template (should be 736x736 transparent PNG)
vinyl_img = Image.open(VINYL_TEMPLATE).convert("RGBA")
if vinyl_img.size != (VINYL_SIZE_PX, VINYL_SIZE_PX):
vinyl_img = vinyl_img.resize((VINYL_SIZE_PX, VINYL_SIZE_PX), resize_mode)
# Load needle image (should be 1080x1080 transparent PNG)
needle_img = Image.open(NEEDLE_IMG).convert("RGBA")
if needle_img.size != (NEEDLE_SIZE_PX, NEEDLE_SIZE_PX):
needle_img = needle_img.resize((NEEDLE_SIZE_PX, NEEDLE_SIZE_PX), resize_mode)
print(f"✓ Images loaded and resized")
print(f" - Base: {base_img.size}")
print(f" - Pet: {pet_img.size}")
print(f" - Vinyl: {vinyl_img.size}")
print(f" - Needle: {needle_img.size}")
return base_img, pet_img, vinyl_img, needle_img
def create_composite_frame(base_img, pet_img, vinyl_img, needle_img, rotation_angle=0):
"""Composite all layers into a single frame.
Args:
rotation_angle: Angle in degrees to rotate vinyl and pet image (clockwise)
"""
# Start with a copy of the base image
frame = base_img.copy()
# This is called hundreds of times (once per frame)
# BILINEAR is 2-3x faster with minimal quality loss in video
resample_mode=Image.Resampling.BILINEAR
# Layer 2: Rotate and paste pet image
if rotation_angle != 0:
# Rotate pet image around its center (negative for clockwise)
rotated_pet = pet_img.rotate(
-rotation_angle, resample=resample_mode, expand=False
)
frame.paste(rotated_pet, (PET_OFFSET_X, PET_OFFSET_Y), rotated_pet)
else:
frame.paste(pet_img, (PET_OFFSET_X, PET_OFFSET_Y), pet_img)
# Layer 3: Rotate and paste vinyl template
if rotation_angle != 0:
# Rotate vinyl around its center (negative for clockwise)
rotated_vinyl = vinyl_img.rotate(
-rotation_angle, resample=resample_mode, expand=False
)
frame.paste(rotated_vinyl, (VINYL_OFFSET_X, VINYL_OFFSET_Y), rotated_vinyl)
else:
frame.paste(vinyl_img, (VINYL_OFFSET_X, VINYL_OFFSET_Y), vinyl_img)
# Layer 4: Paste needle on top
frame.paste(needle_img, (NEEDLE_OFFSET_X, NEEDLE_OFFSET_Y), needle_img)
# Convert to RGB for FFmpeg (removes alpha channel, faster encoding)
return frame.convert("RGB")
def calculate_rotation_duration():
"""Calculate duration in seconds for one full 360° rotation based on RPM."""
rotation_duration = 60.0 / VINYL_RPM # seconds per rotation
return rotation_duration
def calculate_rotation_angle(frame_num, total_frames, include_last_frame=False):
"""Calculate rotation angle for a given frame.
Args:
frame_num: Current frame number (0-indexed)
total_frames: Total number of frames in one rotation
include_last_frame: If True, goes to 360°. If False, stops just before 360°
to avoid duplicate frames when looping
"""
progress = frame_num / total_frames # 0.0 to 1.0
if include_last_frame:
return progress * 360 # 0° to 360°
else:
# For seamless looping, we don't include the 360° frame (same as 0°)
return (progress * 360) % 360 # 0° to just under 360°
def generate_and_stream_frames(
base_img, pet_img, vinyl_img, needle_img, audio_duration, audio_track_path
):
"""Generate frames for one full rotation and stream to FFmpeg via stdin.
Args:
audio_duration: Duration of audio track in seconds
audio_track_path: Path to audio file
Returns:
subprocess.Popen: FFmpeg process handle
"""
step_start = time.time()
# Calculate frames needed for one full rotation
rotation_duration = calculate_rotation_duration()
frames_per_rotation = int(rotation_duration * FRAME_RATE)
# Calculate how many times to loop the rotation
total_rotations = audio_duration / rotation_duration
print(
f"Generating 1 rotation cycle ({frames_per_rotation} frames at {FRAME_RATE} fps)..."
)
print(f" Rotation duration: {rotation_duration:.2f} seconds at {VINYL_RPM} RPM")
print(f" Video will loop {total_rotations:.1f} times to match audio duration")
print(f" Streaming frames directly to FFmpeg (no temp files)...")
# Start FFmpeg process with stdin as input
# We'll use the loop filter to repeat the video seamlessly
# Calculate exact number of loops needed
num_loops = int(audio_duration / rotation_duration)
# fmt: off
ffmpeg_cmd = [
"ffmpeg",
"-y", # Overwrite output file
"-f", "image2pipe", # Read images from pipe
"-framerate", str(FRAME_RATE),
"-vcodec", "png", # Explicitly tell FFmpeg the input codec
"-i", "pipe:0", # Read from stdin
"-i", audio_track_path, # Audio input
"-filter_complex", f"[0:v]loop=loop={num_loops}:size={frames_per_rotation}:start=0[outv]", # Loop video
"-map", "[outv]", # Use looped video
"-map", "1:a", # Use audio from second input
"-preset", "faster", # Trade encoding time for file size (ultrafast/superfast/veryfast/faster/fast/medium)
"-c:v", "libx264",
"-crf", "28", # Constant Rate Factor: 18=high quality, 28=good quality/smaller, 32+=lower quality
"-pix_fmt", "yuv420p", # Convert pixel format (standard for MP4)
"-c:a", "aac", # Encode audio using AAC codec
"-b:a", "192k",
"-r", str(FRAME_RATE),
"-movflags", "+faststart", # Optimize for web streaming (metadata at beginning)
"-shortest", # Stop when audio ends
OUTPUT_VIDEO,
]
# fmt: on
try:
# Start FFmpeg process
ffmpeg_process = subprocess.Popen(
ffmpeg_cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=10**8, # Large buffer for performance
)
# Generate and stream frames for one full rotation
for frame_num in range(frames_per_rotation):
# Calculate rotation angle (excluding 360° to avoid duplicate with 0°)
rotation_angle = calculate_rotation_angle(
frame_num, frames_per_rotation, include_last_frame=False
)
# Create composite frame
composite_frame = create_composite_frame(
base_img, pet_img, vinyl_img, needle_img, rotation_angle
)
# Convert PIL Image to PNG bytes and write to FFmpeg stdin
composite_frame.save(ffmpeg_process.stdin, format="PNG")
# Progress indicator
if (frame_num + 1) % 50 == 0 or frame_num == frames_per_rotation - 1:
print(f" Progress: {frame_num + 1}/{frames_per_rotation} frames")
# Close stdin to signal end of input
ffmpeg_process.stdin.close()
# Wait for FFmpeg to finish
# Prevent buffer fill-up and cause the process to hang indefinitely
stdout, stderr = ffmpeg_process.communicate()
if ffmpeg_process.returncode != 0:
print(f"✗ Error creating video with ffmpeg:")
print(stderr.decode())
raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_cmd)
step_time = time.time() - step_start
print(f"✓ Video created successfully: {OUTPUT_VIDEO}")
print(f" Time taken: {step_time:.2f} seconds ({step_time / 60:.2f} minutes)")
except BrokenPipeError:
print("✗ FFmpeg process terminated unexpectedly")
raise
return step_time
def check_dependencies():
"""Verify that required system tools are installed."""
if shutil.which("ffmpeg") is None:
print("=" * 60)
print("✗ ERROR: 'ffmpeg' was not found on your system PATH.")
print("This tool is required to convert image frames into a video.")
print("\nTo install it:")
print(" - macOS: brew install ffmpeg")
print(" - Ubuntu/Debian: sudo apt-get install ffmpeg")
print(" - Windows: https://ffmpeg.org/download.html")
print("=" * 60)
raise SystemExit(1)
print("✓ Dependency check passed (ffmpeg found)")
def validate_audio_duration(audio_duration):
"""Validate that audio duration is sufficient for at least one rotation.
Args:
audio_duration: Duration of audio in seconds
Raises:
SystemExit: If audio is too short
"""
rotation_duration = calculate_rotation_duration()
if audio_duration < rotation_duration:
print("=" * 60)
print(
f"✗ ERROR: Audio duration ({audio_duration:.2f}s) is shorter than one full rotation ({rotation_duration:.2f}s at {VINYL_RPM} RPM)"
)
print(f"Please use a longer audio track or increase the VINYL_RPM setting.")
print("=" * 60)
raise SystemExit(1)
def main(pet_img_path, audio_track_path, output_path=None):
"""Main execution function.
Args:
pet_img_path: Path to pet image file
audio_track_path: Path to audio track file
output_path: Optional custom output path for the video file.
If not provided, uses auto-generated path in OUTPUT_DIR.
"""
global OUTPUT_VIDEO
if output_path:
OUTPUT_VIDEO = output_path
# Ensure parent directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
start_time = time.time()
start_datetime = dt.now()
print("=" * 60)
print(f"Start time: {start_datetime.strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
print(f"Pet image: {pet_img_path}")
print(f"Audio track: {audio_track_path}")
print("=" * 60)
check_dependencies()
# Step 0: Get audio duration and validate
audio_duration = get_audio_duration(audio_track_path)
# Validate audio duration
validate_audio_duration(audio_duration)
# Step 1: Setup
ensure_directories()
# Step 2: Load and prepare images
base_img, pet_img, vinyl_img, needle_img = load_and_resize_images(pet_img_path)
print("=" * 60)
# Step 3: Generate frames for one rotation and stream to FFmpeg with looping
generate_and_stream_frames(
base_img, pet_img, vinyl_img, needle_img, audio_duration, audio_track_path
)
end_time = time.time()
end_datetime = dt.now()
total_time = end_time - start_time
print("=" * 60)
print(f"✓ COMPLETE! Video saved to: {OUTPUT_VIDEO}")
print(f"End time: {end_datetime.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Total time taken: {total_time:.2f} seconds ({total_time / 60:.2f} minutes)")
print(
f"Video file size: {Path(OUTPUT_VIDEO).stat().st_size / (1024 * 1024):.1f} MB"
)
print("=" * 60)
if __name__ == "__main__":
# Default parameters for testing
PET_IMG = f"{ASSET_DIR}/dog_upload.jpg"
AUDIO_TRACK = f"{ASSET_DIR}/my-track.mp3"
main(pet_img_path=PET_IMG, audio_track_path=AUDIO_TRACK)