diff --git a/backend/video_generator/create_video.py b/backend/video_generator/create_video.py index 0a485e2..263ef19 100644 --- a/backend/video_generator/create_video.py +++ b/backend/video_generator/create_video.py @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) # Asset paths (relative to this file's directory) SCRIPT_DIR = Path(__file__).parent ASSET_DIR = SCRIPT_DIR / "assets" -BASE_IMG = ASSET_DIR / "1080x1080-bg.png" +BASE_IMG = f"{ASSET_DIR}/1080x1920-bg.png" # 1080x1920px portrait image VINYL_TEMPLATE = ASSET_DIR / "736-x-736-record.png" DEFAULT_RECORD = ASSET_DIR / "default-record.png" NEEDLE_IMG = ASSET_DIR / "needle.png" @@ -45,7 +45,7 @@ NEEDLE_IMG = ASSET_DIR / "needle.png" # Video settings VIDEO_FORMAT = "mp4" FINAL_VIDEO_WIDTH_PX = 720 -FINAL_VIDEO_HEIGHT_PX = 720 +FINAL_VIDEO_HEIGHT_PX = 1280 # 9:16 aspect ratio (720 * 16/9 = 1280) FRAME_RATE = 15 # Composite image settings (for static record image export) @@ -62,17 +62,29 @@ SCALE_FACTOR = FINAL_VIDEO_WIDTH_PX / ORIGINAL_RESOLUTION BASE_WIDTH = FINAL_VIDEO_WIDTH_PX BASE_HEIGHT = FINAL_VIDEO_HEIGHT_PX +# Pet image settings PET_SIZE_PX = int(360 * SCALE_FACTOR) +# Vinyl template settings VINYL_SIZE_PX = int(736 * SCALE_FACTOR) +# Vinyl record positioning +# Centered horizontally, positioned vertically +VINYL_VERTICAL_ADJUST = int(95 * SCALE_FACTOR) # Adjust this: positive = down, negative = up VINYL_OFFSET_X = (BASE_WIDTH - VINYL_SIZE_PX) // 2 -VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + int(100 * SCALE_FACTOR) +VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + VINYL_VERTICAL_ADJUST +# Pet image positioning (relative to vinyl record center) +# Automatically centered within the vinyl record PET_OFFSET_X = VINYL_OFFSET_X + (VINYL_SIZE_PX - PET_SIZE_PX) // 2 PET_OFFSET_Y = VINYL_OFFSET_Y + (VINYL_SIZE_PX - PET_SIZE_PX) // 2 -NEEDLE_OFFSET_X = 0 -NEEDLE_OFFSET_Y = 0 +# Needle positioning +# Centered horizontally, positioned vertically +NEEDLE_VERTICAL_ADJUST = int(400 * SCALE_FACTOR) # Adjust this: positive = down, negative = up +NEEDLE_SIZE_PX = int(1080 * SCALE_FACTOR) # Scales proportionally with other elements + +NEEDLE_OFFSET_X = (BASE_WIDTH - NEEDLE_SIZE_PX) // 2 +NEEDLE_OFFSET_Y = 0 + NEEDLE_VERTICAL_ADJUST def create_record_composite(pet_img_path: str, output_path: str) -> None: @@ -136,8 +148,8 @@ def load_and_resize_images(pet_img_path: str | None) -> tuple: # Load needle image needle_img = Image.open(NEEDLE_IMG).convert("RGBA") - if needle_img.size != (BASE_WIDTH, BASE_HEIGHT): - needle_img = needle_img.resize((BASE_WIDTH, BASE_HEIGHT), resize_mode) + if needle_img.size != (NEEDLE_SIZE_PX, NEEDLE_SIZE_PX): + needle_img = needle_img.resize((NEEDLE_SIZE_PX, NEEDLE_SIZE_PX), resize_mode) logger.info(f"Images loaded - Base: {base_img.size}, Pet: {pet_img.size if pet_img else 'None'}, Vinyl: {vinyl_img.size}") return base_img, pet_img, vinyl_img, needle_img diff --git a/video_generator_example/create-video-portrait.py b/video_generator_example/create-video-portrait.py new file mode 100644 index 0000000..fd62f52 --- /dev/null +++ b/video_generator_example/create-video-portrait.py @@ -0,0 +1,456 @@ +""" +Video Creation Script - Vinyl Record Animation + +Creates an animated video with a rotating vinyl record effect by layering: +- Base background image (1080x1080) +- Pet/cover art image (centered in vinyl, rotates with record) +- Vinyl record template (transparent PNG, rotates) +- Needle overlay (stationary) + +The script generates ONE complete rotation cycle and streams frames directly to +FFmpeg, which seamlessly loops the rotation to match the audio duration. This +approach is significantly faster and more efficient than generating all frames. + +The vinyl rotates at a configurable RPM (default: 20 RPM for smooth animation). +Audio duration must be at least as long as one full rotation. + +Dependency: + - ffmpeg should be installed on the OS and be present on the system $PATH + +Usage: + main(pet_img_path="path/to/image.jpg", audio_track_path="path/to/audio.mp3") + + Run script with uv: + $ uv run create-video.py + +Configuration: + Only settings in the below categories should be modified: + - Asset paths + - Video settings + - Rotation settings + - Output settings + + Do not modify calculated values (image positioning offsets, output settings). +""" + +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "mutagen", +# "pillow", +# ] +# /// + +import shutil +import subprocess +import time +from datetime import datetime as dt +from math import ceil +from pathlib import Path + +from mutagen.mp3 import MP3 +from PIL import Image + +# START Configuration + +# Asset paths +ASSET_DIR = "./assets" +# BASE_IMG = f"{ASSET_DIR}/1080x1080-bg.png" # 1080 sq. px. image +BASE_IMG = f"{ASSET_DIR}/1080x1920-bg.png" # 1080x1920 px portrait image +VINYL_TEMPLATE = f"{ASSET_DIR}/736-x-736-record.png" # 736sq.px. image +NEEDLE_IMG = f"{ASSET_DIR}/needle.png" # 1080 sq. px. image + + +# Video settings +VIDEO_FORMAT = "mp4" +FINAL_VIDEO_WIDTH_PX = 720 # lower this for faster generation +# FINAL_VIDEO_HEIGHT_PX = 720 # lower this for faster generation +FINAL_VIDEO_HEIGHT_PX = 1280 # 9:16 aspect ratio (720 * 16/9 = 1280) +FRAME_RATE = 15 + +# Rotation settings +# Rotations per minute (33.33 = standard LP speed, 45 = single speed) +# Lower value for slower rotation/spin +VINYL_RPM = 20 + +# Output settings +OUTPUT_DIR = "./output" +OUTPUT_VIDEO = ( + f"{OUTPUT_DIR}/final_video-{dt.now().strftime('%Y%b%d_%H%M%S')}.{VIDEO_FORMAT}" +) + +# END Configuration + +# Image positioning offsets (centered on base image) +# These can be adjusted for fine-tuning + +# We'll use width as our reference dimension since the vinyl assets +# are designed for a 1080x1080 square canvas +ORIGINAL_WIDTH = 1080 + +# Scale factor based on target resolution +SCALE_FACTOR = FINAL_VIDEO_WIDTH_PX / ORIGINAL_WIDTH + +BASE_WIDTH = FINAL_VIDEO_WIDTH_PX +BASE_HEIGHT = FINAL_VIDEO_HEIGHT_PX + +# Pet image settings +PET_SIZE_PX = int(360 * SCALE_FACTOR) # Size to fit in vinyl center circle + +# Vinyl template settings +VINYL_SIZE_PX = int(736 * SCALE_FACTOR) + +# Needle settings (scales proportionally with other elements) +NEEDLE_SIZE_PX = int(1080 * SCALE_FACTOR) # 720px at current resolution + +# ============================================================================ +# OFFSET ADJUSTMENT SECTION - Modify these values to fine-tune positioning +# ============================================================================ +# Note: Positive values move RIGHT (X) or DOWN (Y), negative values move LEFT/UP +# All offsets are calculated to center elements, then adjusted as needed + +# Vinyl record positioning +# Centered horizontally, positioned vertically (adjust VINYL_VERTICAL_ADJUST to move up/down) +VINYL_VERTICAL_ADJUST = int(95 * SCALE_FACTOR) # Adjust this: positive = down, negative = up + +VINYL_OFFSET_X = (BASE_WIDTH - VINYL_SIZE_PX) // 2 +VINYL_OFFSET_Y = (BASE_HEIGHT - VINYL_SIZE_PX) // 2 + VINYL_VERTICAL_ADJUST + +# Pet image positioning (relative to vinyl record center) +# Automatically centered within the vinyl record +PET_OFFSET_X = VINYL_OFFSET_X + (VINYL_SIZE_PX - PET_SIZE_PX) // 2 +PET_OFFSET_Y = VINYL_OFFSET_Y + (VINYL_SIZE_PX - PET_SIZE_PX) // 2 + +# Needle positioning +# Centered horizontally, positioned vertically (adjust NEEDLE_VERTICAL_ADJUST to move up/down) +NEEDLE_VERTICAL_ADJUST = int(400 * SCALE_FACTOR) # Adjust this: positive = down, negative = up + +NEEDLE_OFFSET_X = (BASE_WIDTH - NEEDLE_SIZE_PX) // 2 +NEEDLE_OFFSET_Y = 0 + NEEDLE_VERTICAL_ADJUST + +# ============================================================================ + +def ensure_directories(): + """Create output directories if they don't exist.""" + Path(OUTPUT_DIR).mkdir(exist_ok=True) + print(f"✓ Output directories created/verified") + + +def get_audio_duration(audio_track_path): + """Read the duration of the audio track.""" + print("Reading audio track duration...") + audio = MP3(audio_track_path) + duration = ceil(audio.info.length) # round up any fractional seconds + print(f"✓ Audio duration: {duration:.2f} seconds ({duration / 60:.2f} minutes)") + return duration + + +def load_and_resize_images(pet_img_path): + """Load all images and resize them to required dimensions.""" + print("Loading and resizing images...") + + # Use LANCZOS for initial resize (better quality, only done once) + resize_mode = Image.Resampling.LANCZOS + + # Load base image (should already be 1080x1080) + base_img = Image.open(BASE_IMG).convert("RGBA") + if base_img.size != (BASE_WIDTH, BASE_HEIGHT): + base_img = base_img.resize((BASE_WIDTH, BASE_HEIGHT), resize_mode) + + # Load and resize pet image to 360x360 + pet_img = Image.open(pet_img_path).convert("RGBA") + pet_img = pet_img.resize((PET_SIZE_PX, PET_SIZE_PX), resize_mode) + + # Load vinyl template (should be 736x736 transparent PNG) + vinyl_img = Image.open(VINYL_TEMPLATE).convert("RGBA") + if vinyl_img.size != (VINYL_SIZE_PX, VINYL_SIZE_PX): + vinyl_img = vinyl_img.resize((VINYL_SIZE_PX, VINYL_SIZE_PX), resize_mode) + + # Load needle image (should be 1080x1080 transparent PNG) + needle_img = Image.open(NEEDLE_IMG).convert("RGBA") + if needle_img.size != (NEEDLE_SIZE_PX, NEEDLE_SIZE_PX): + needle_img = needle_img.resize((NEEDLE_SIZE_PX, NEEDLE_SIZE_PX), resize_mode) + + print(f"✓ Images loaded and resized") + print(f" - Base: {base_img.size}") + print(f" - Pet: {pet_img.size}") + print(f" - Vinyl: {vinyl_img.size}") + print(f" - Needle: {needle_img.size}") + + return base_img, pet_img, vinyl_img, needle_img + + +def create_composite_frame(base_img, pet_img, vinyl_img, needle_img, rotation_angle=0): + """Composite all layers into a single frame. + + Args: + rotation_angle: Angle in degrees to rotate vinyl and pet image (clockwise) + """ + # Start with a copy of the base image + frame = base_img.copy() + + # This is called hundreds of times (once per frame) + # BILINEAR is 2-3x faster with minimal quality loss in video + resample_mode=Image.Resampling.BILINEAR + + # Layer 2: Rotate and paste pet image + if rotation_angle != 0: + # Rotate pet image around its center (negative for clockwise) + rotated_pet = pet_img.rotate( + -rotation_angle, resample=resample_mode, expand=False + ) + frame.paste(rotated_pet, (PET_OFFSET_X, PET_OFFSET_Y), rotated_pet) + else: + frame.paste(pet_img, (PET_OFFSET_X, PET_OFFSET_Y), pet_img) + + # Layer 3: Rotate and paste vinyl template + if rotation_angle != 0: + # Rotate vinyl around its center (negative for clockwise) + rotated_vinyl = vinyl_img.rotate( + -rotation_angle, resample=resample_mode, expand=False + ) + frame.paste(rotated_vinyl, (VINYL_OFFSET_X, VINYL_OFFSET_Y), rotated_vinyl) + else: + frame.paste(vinyl_img, (VINYL_OFFSET_X, VINYL_OFFSET_Y), vinyl_img) + + # Layer 4: Paste needle on top + frame.paste(needle_img, (NEEDLE_OFFSET_X, NEEDLE_OFFSET_Y), needle_img) + + # Convert to RGB for FFmpeg (removes alpha channel, faster encoding) + return frame.convert("RGB") + + +def calculate_rotation_duration(): + """Calculate duration in seconds for one full 360° rotation based on RPM.""" + rotation_duration = 60.0 / VINYL_RPM # seconds per rotation + return rotation_duration + + +def calculate_rotation_angle(frame_num, total_frames, include_last_frame=False): + """Calculate rotation angle for a given frame. + + Args: + frame_num: Current frame number (0-indexed) + total_frames: Total number of frames in one rotation + include_last_frame: If True, goes to 360°. If False, stops just before 360° + to avoid duplicate frames when looping + """ + progress = frame_num / total_frames # 0.0 to 1.0 + + if include_last_frame: + return progress * 360 # 0° to 360° + else: + # For seamless looping, we don't include the 360° frame (same as 0°) + return (progress * 360) % 360 # 0° to just under 360° + + +def generate_and_stream_frames( + base_img, pet_img, vinyl_img, needle_img, audio_duration, audio_track_path +): + """Generate frames for one full rotation and stream to FFmpeg via stdin. + + Args: + audio_duration: Duration of audio track in seconds + audio_track_path: Path to audio file + + Returns: + subprocess.Popen: FFmpeg process handle + """ + step_start = time.time() + + # Calculate frames needed for one full rotation + rotation_duration = calculate_rotation_duration() + frames_per_rotation = int(rotation_duration * FRAME_RATE) + + # Calculate how many times to loop the rotation + total_rotations = audio_duration / rotation_duration + + print( + f"Generating 1 rotation cycle ({frames_per_rotation} frames at {FRAME_RATE} fps)..." + ) + print(f" Rotation duration: {rotation_duration:.2f} seconds at {VINYL_RPM} RPM") + print(f" Video will loop {total_rotations:.1f} times to match audio duration") + print(f" Streaming frames directly to FFmpeg (no temp files)...") + + # Start FFmpeg process with stdin as input + # We'll use the loop filter to repeat the video seamlessly + # Calculate exact number of loops needed + num_loops = int(audio_duration / rotation_duration) + + # fmt: off + ffmpeg_cmd = [ + "ffmpeg", + "-y", # Overwrite output file + "-f", "image2pipe", # Read images from pipe + "-framerate", str(FRAME_RATE), + "-vcodec", "png", # Explicitly tell FFmpeg the input codec + "-i", "pipe:0", # Read from stdin + "-i", audio_track_path, # Audio input + "-filter_complex", f"[0:v]loop=loop={num_loops}:size={frames_per_rotation}:start=0[outv]", # Loop video + "-map", "[outv]", # Use looped video + "-map", "1:a", # Use audio from second input + "-preset", "faster", # Trade encoding time for file size (ultrafast/superfast/veryfast/faster/fast/medium) + "-c:v", "libx264", + "-crf", "28", # Constant Rate Factor: 18=high quality, 28=good quality/smaller, 32+=lower quality + "-pix_fmt", "yuv420p", # Convert pixel format (standard for MP4) + "-c:a", "aac", # Encode audio using AAC codec + "-b:a", "192k", + "-r", str(FRAME_RATE), + "-movflags", "+faststart", # Optimize for web streaming (metadata at beginning) + "-shortest", # Stop when audio ends + OUTPUT_VIDEO, + ] + # fmt: on + + try: + # Start FFmpeg process + ffmpeg_process = subprocess.Popen( + ffmpeg_cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + bufsize=10**8, # Large buffer for performance + ) + + # Generate and stream frames for one full rotation + for frame_num in range(frames_per_rotation): + # Calculate rotation angle (excluding 360° to avoid duplicate with 0°) + rotation_angle = calculate_rotation_angle( + frame_num, frames_per_rotation, include_last_frame=False + ) + + # Create composite frame + composite_frame = create_composite_frame( + base_img, pet_img, vinyl_img, needle_img, rotation_angle + ) + + # Convert PIL Image to PNG bytes and write to FFmpeg stdin + composite_frame.save(ffmpeg_process.stdin, format="PNG") + + # Progress indicator + if (frame_num + 1) % 50 == 0 or frame_num == frames_per_rotation - 1: + print(f" Progress: {frame_num + 1}/{frames_per_rotation} frames") + + # Close stdin to signal end of input + ffmpeg_process.stdin.close() + + # Wait for FFmpeg to finish + # Prevent buffer fill-up and cause the process to hang indefinitely + stdout, stderr = ffmpeg_process.communicate() + + if ffmpeg_process.returncode != 0: + print(f"✗ Error creating video with ffmpeg:") + print(stderr.decode()) + raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_cmd) + + step_time = time.time() - step_start + print(f"✓ Video created successfully: {OUTPUT_VIDEO}") + print(f" Time taken: {step_time:.2f} seconds ({step_time / 60:.2f} minutes)") + + except BrokenPipeError: + print("✗ FFmpeg process terminated unexpectedly") + raise + + return step_time + + +def check_dependencies(): + """Verify that required system tools are installed.""" + if shutil.which("ffmpeg") is None: + print("=" * 60) + print("✗ ERROR: 'ffmpeg' was not found on your system PATH.") + print("This tool is required to convert image frames into a video.") + print("\nTo install it:") + print(" - macOS: brew install ffmpeg") + print(" - Ubuntu/Debian: sudo apt-get install ffmpeg") + print(" - Windows: https://ffmpeg.org/download.html") + print("=" * 60) + raise SystemExit(1) + print("✓ Dependency check passed (ffmpeg found)") + + +def validate_audio_duration(audio_duration): + """Validate that audio duration is sufficient for at least one rotation. + + Args: + audio_duration: Duration of audio in seconds + + Raises: + SystemExit: If audio is too short + """ + rotation_duration = calculate_rotation_duration() + if audio_duration < rotation_duration: + print("=" * 60) + print( + f"✗ ERROR: Audio duration ({audio_duration:.2f}s) is shorter than one full rotation ({rotation_duration:.2f}s at {VINYL_RPM} RPM)" + ) + print(f"Please use a longer audio track or increase the VINYL_RPM setting.") + print("=" * 60) + raise SystemExit(1) + + +def main(pet_img_path, audio_track_path, output_path=None): + """Main execution function. + + Args: + pet_img_path: Path to pet image file + audio_track_path: Path to audio track file + output_path: Optional custom output path for the video file. + If not provided, uses auto-generated path in OUTPUT_DIR. + """ + global OUTPUT_VIDEO + if output_path: + OUTPUT_VIDEO = output_path + # Ensure parent directory exists + Path(output_path).parent.mkdir(parents=True, exist_ok=True) + + start_time = time.time() + start_datetime = dt.now() + + print("=" * 60) + print(f"Start time: {start_datetime.strftime('%Y-%m-%d %H:%M:%S')}") + print("=" * 60) + print(f"Pet image: {pet_img_path}") + print(f"Audio track: {audio_track_path}") + print("=" * 60) + + check_dependencies() + + # Step 0: Get audio duration and validate + audio_duration = get_audio_duration(audio_track_path) + + # Validate audio duration + validate_audio_duration(audio_duration) + + # Step 1: Setup + ensure_directories() + + # Step 2: Load and prepare images + base_img, pet_img, vinyl_img, needle_img = load_and_resize_images(pet_img_path) + print("=" * 60) + + # Step 3: Generate frames for one rotation and stream to FFmpeg with looping + generate_and_stream_frames( + base_img, pet_img, vinyl_img, needle_img, audio_duration, audio_track_path + ) + + end_time = time.time() + end_datetime = dt.now() + total_time = end_time - start_time + + print("=" * 60) + print(f"✓ COMPLETE! Video saved to: {OUTPUT_VIDEO}") + print(f"End time: {end_datetime.strftime('%Y-%m-%d %H:%M:%S')}") + print(f"Total time taken: {total_time:.2f} seconds ({total_time / 60:.2f} minutes)") + print( + f"Video file size: {Path(OUTPUT_VIDEO).stat().st_size / (1024 * 1024):.1f} MB" + ) + print("=" * 60) + + +if __name__ == "__main__": + # Default parameters for testing + PET_IMG = f"{ASSET_DIR}/dog_upload.jpg" + AUDIO_TRACK = f"{ASSET_DIR}/my-track.mp3" + + main(pet_img_path=PET_IMG, audio_track_path=AUDIO_TRACK)