feat(pause-insert): adaptive buffer, forward-snap, timeline drag + share link fix

Backend (Phase A):
- A1: Adaptive silence buffer — natural_gap_ms persisted per cue; renderer computes
  per-cue silence_before/silence_after instead of fixed 500ms; per-cue silence files
- A2: Forward-preferred snap — snap_pause_point prefers boundaries up to 4s ahead
  over boundaries within 1.5s behind, reducing mid-scene cuts
- A3: Min-gap validation — pause points with < 200ms gap trigger forward search
  to the next acceptable gap
- natural_gap_ms added to PausePointData model and api.ts type
- New config fields: whisper_snap_forward_window, whisper_snap_backward_window,
  ad_silence_buffer_default, ad_silence_buffer_min_after, ad_min_acceptable_gap
- Tests: test_whisper_snap.py (13 tests), test_video_renderer_buffers.py

Frontend (Phase B):
- B1: Drag pause-point markers — pointer state machine with 3px move threshold,
  clamp to min/max bounds, click-without-move still opens PausePointEditor
- B2: Drag freeze blocks — orange blocks translate with linked pause point
- B3: Time tooltip visible during drag, hidden on release
- Tests: TimelinePreview.drag.test.tsx (10 tests)

Fixes:
- Share link pointed to ai-sandbox.oliver.solutions — added app_url to Settings
  with correct optical-dev.oliver.solutions default; share_url now configurable
  via APP_URL env var
- Removed all ai-sandbox.oliver.solutions references from docker-compose,
  apache config, docs, and scripts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-05-01 16:09:09 +01:00
parent 31d631f70d
commit 2f4925353a
18 changed files with 998 additions and 119 deletions

View file

@ -1,6 +1,6 @@
# =============================================================================
# Apache config fragment — Accessible Video Platform
# Inject into: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf
# Inject into: /etc/apache2/sites-available/optical-dev.oliver.solutions-ssl.conf
#
# Required modules:
# sudo a2enmod proxy proxy_http proxy_wstunnel rewrite headers
@ -70,7 +70,7 @@ Alias /video-accessibility /var/www/html/video-accessibility
# =============================================================================
#
# <VirtualHost *:443>
# ServerName ai-sandbox.oliver.solutions
# ServerName optical-dev.oliver.solutions
# DocumentRoot /var/www/html
#
# SSLEngine on
@ -82,8 +82,8 @@ Alias /video-accessibility /var/www/html/video-accessibility
#
# # — paste the block above here —
#
# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log
# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined
# ErrorLog ${APACHE_LOG_DIR}/optical-dev-error.log
# CustomLog ${APACHE_LOG_DIR}/optical-dev-access.log combined
# </VirtualHost>
# =============================================================================
@ -91,6 +91,6 @@ Alias /video-accessibility /var/www/html/video-accessibility
# =============================================================================
# sudo apache2ctl configtest
# sudo systemctl reload apache2
# curl -I https://ai-sandbox.oliver.solutions/video-accessibility/
# curl https://ai-sandbox.oliver.solutions/video-accessibility/api/v1/health
# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility/api/v1/ws/job-list
# curl -I https://optical-dev.oliver.solutions/video-accessibility/
# curl https://optical-dev.oliver.solutions/video-accessibility/api/v1/health
# wscat -c wss://optical-dev.oliver.solutions/video-accessibility/api/v1/ws/job-list

View file

@ -21,8 +21,7 @@ _JOBS = "jobs"
def _share_url(token: str) -> str:
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
return f"{base}/share/{token}"
return f"{settings.app_url}/share/{token}"
# ── Request schemas ───────────────────────────────────────────────────────────

View file

@ -6,6 +6,7 @@ class Settings(BaseSettings):
# App
app_env: str = "dev"
api_base_url: str = "http://localhost:8000"
app_url: str = "https://optical-dev.oliver.solutions/video-accessibility"
# Auth
jwt_secret: str
@ -247,6 +248,14 @@ class Settings(BaseSettings):
whisper_sentence_gap_threshold: float = 0.5 # Gap duration to classify as sentence boundary
whisper_phrase_gap_threshold: float = 0.3 # Gap duration to classify as phrase boundary
whisper_min_gap_threshold: float = 0.15 # Minimum gap duration to consider
# Forward-preferred snap windows (A2)
whisper_snap_forward_window: float = 4.0 # Prefer boundary up to N seconds ahead of Gemini point
whisper_snap_backward_window: float = 1.5 # Fall back to boundary up to N seconds behind
# Adaptive silence buffer (A1)
ad_silence_buffer_default: float = 0.5 # Base silence duration (s) before/after AD audio
ad_silence_buffer_min_after: float = 0.1 # Minimum silence after AD audio
# Minimum gap required at the chosen pause point (A3)
ad_min_acceptable_gap: float = 0.2 # Seconds; points with shorter gaps trigger forward search
# Cloud Run Service URLs (empty = use local processing)
# When set, CPU-intensive work is offloaded to Cloud Run with autoscaling

View file

@ -90,6 +90,7 @@ class PausePointData(BaseModel):
adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
max_bound_ms: float # Maximum allowed value (start of next AD segment)
natural_gap_ms: float = 0.0 # Natural silence already present at pause point (ms); used to size silence buffers
class VideoSegmentMetadata(BaseModel):

View file

@ -116,8 +116,7 @@ def _qc_recipients(
def _deep_link(job_id: str, lang: str) -> str:
from ..core.config import settings
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
return f"{base}/admin/qc/{job_id}#lang-{lang}"
return f"{settings.app_url}/admin/qc/{job_id}#lang-{lang}"
# ── Auto-assignment ───────────────────────────────────────────────────────────

View file

@ -54,6 +54,9 @@ class VideoRendererService:
# Audio ducking settings
self.duck_level = getattr(settings, 'accessible_video_duck_level', 0.3)
self.duck_fade_ms = getattr(settings, 'accessible_video_duck_fade_ms', 200)
# Adaptive silence buffer settings (A1)
self._silence_buffer_default = getattr(settings, 'ad_silence_buffer_default', 0.5)
self._silence_buffer_min_after = getattr(settings, 'ad_silence_buffer_min_after', 0.1)
# Cloud Run support
self._gcs_client: storage.Client | None = None
# Source video caching for Cloud Run (uploaded once, reused across operations)
@ -599,18 +602,38 @@ class VideoRendererService:
final_segment_needed = final_segment_start < source_duration
# ============================================================
# PARALLEL PHASE 1: Generate shared silence + extract all frames + all video segments
# PARALLEL PHASE 1: Generate per-cue silence files + extract all frames + video segments
# ============================================================
logger.info(f"Phase 1: Parallel extraction of {len(valid_placements)} frames and video segments")
silence_duration = 0.5 # 500ms shared by all
silence_path = temp_dir_path / "silence_shared.m4a"
# Compute adaptive silence buffers per cue (A1):
# natural_gap_ms already present at the pause point reduces how much silence we add.
_buf_default = self._silence_buffer_default
_buf_min_after = self._silence_buffer_min_after
silence_pre_paths: dict[int, str] = {}
silence_post_paths: dict[int, str] = {}
for p in valid_placements:
i = p["index"]
natural_gap = (p.get("natural_gap_ms") or 0.0) / 1000.0
silence_before = max(0.05, _buf_default - natural_gap * 0.5)
silence_after = max(_buf_min_after, _buf_default - natural_gap * 0.3)
p["silence_before"] = silence_before
p["silence_after"] = silence_after
silence_pre_paths[i] = str(temp_dir_path / f"silence_pre_{i}.m4a")
silence_post_paths[i] = str(temp_dir_path / f"silence_post_{i}.m4a")
logger.debug(
f"Cue {p['cue_index']}: natural_gap={natural_gap*1000:.0f}ms → "
f"silence_before={silence_before*1000:.0f}ms silence_after={silence_after*1000:.0f}ms"
)
# Build tasks for phase 1
phase1_tasks = []
# Task: Generate silence (just once, shared by all)
phase1_tasks.append(self._generate_silence(silence_duration, str(silence_path), video_props))
# Tasks: Generate per-cue silence files
for p in valid_placements:
i = p["index"]
phase1_tasks.append(self._generate_silence(p["silence_before"], silence_pre_paths[i], video_props))
phase1_tasks.append(self._generate_silence(p["silence_after"], silence_post_paths[i], video_props))
# Tasks: Extract all video segments
video_segment_paths = {}
@ -667,7 +690,7 @@ class VideoRendererService:
combined_audio_path = temp_dir_path / f"combined_audio_{i}.m4a"
combined_audio_paths[i] = str(combined_audio_path)
phase2_tasks.append(self._concatenate_audio(
[str(silence_path), p["ad_mp3_path"], str(silence_path)],
[silence_pre_paths[i], p["ad_mp3_path"], silence_post_paths[i]],
str(combined_audio_path),
video_props
))
@ -686,11 +709,14 @@ class VideoRendererService:
i = p["index"]
cue_index = p["cue_index"]
ad_duration = p["ad_duration"]
total_freeze_duration = ad_duration + (2 * silence_duration)
silence_before = p["silence_before"]
silence_after = p["silence_after"]
total_freeze_duration = ad_duration + silence_before + silence_after
logger.info(
f"Cue {cue_index}: Freeze segment with silence buffers - "
f"500ms + AD={ad_duration:.2f}s + 500ms = {total_freeze_duration:.2f}s"
f"Cue {cue_index}: Freeze segment — "
f"pre={silence_before*1000:.0f}ms + AD={ad_duration:.2f}s + "
f"post={silence_after*1000:.0f}ms = {total_freeze_duration:.2f}s"
)
freeze_segment_path = temp_dir_path / f"freeze_segment_{i}.mp4"
@ -720,7 +746,7 @@ class VideoRendererService:
p["actual_freeze_duration"] = actual_duration
# Log any discrepancy between expected and actual duration
expected = p["ad_duration"] + (2 * silence_duration)
expected = p["ad_duration"] + p["silence_before"] + p["silence_after"]
discrepancy = actual_duration - expected
if abs(discrepancy) > 0.01: # 10ms threshold
logger.warning(
@ -1535,7 +1561,7 @@ class VideoRendererService:
"""
Generate a silent audio file of specified duration.
Used to create 500ms silence buffers before/after AD audio.
Used to create adaptive silence buffers before/after AD audio.
"""
if self._use_cloud_run:
await self._generate_silence_cloud_run(duration, output_path, props)

View file

@ -97,8 +97,12 @@ class WhisperService:
self.phrase_gap_threshold = _get_setting('whisper_phrase_gap_threshold', 0.3)
self.min_gap_threshold = _get_setting('whisper_min_gap_threshold', 0.15)
# Snapping configuration
# Snapping configuration (A2: forward-preferred snap)
self.max_search_window = _get_setting('whisper_max_search_window', 30.0)
self.snap_forward_window = _get_setting('whisper_snap_forward_window', 4.0)
self.snap_backward_window = _get_setting('whisper_snap_backward_window', 1.5)
# Minimum gap duration to accept as a pause insertion point (A3: gap validation)
self.min_acceptable_gap = _get_setting('ad_min_acceptable_gap', 0.2)
@property
def model(self) -> WhisperModel:
@ -367,20 +371,18 @@ class WhisperService:
gaps: list[SpeechGap],
boundaries: list[SentenceBoundary],
speaking_threshold: float = 2.0
) -> tuple[float, float, str | None]:
) -> tuple[float, float, str | None, float]:
"""
Snap a Gemini pause point to the nearest sentence boundary.
Snap a Gemini pause point to an appropriate sentence boundary.
Simplified algorithm:
1. Check if "during speaking" (words within ±threshold)
- If NO Use Gemini's exact pause point
2. If during speaking, find nearest sentence gap and snap to MIDPOINT
3. Edge cases:
- Case A: First sentence in video pause at video start (0.0)
- Case B: Last sentence in video pause at video end
The video renderer adds 500ms silence buffers before/after AD audio,
so no overlap or catch-up logic is needed here.
Algorithm:
1. If NOT during speaking use Gemini's exact point (already in a natural gap).
2. Forward-preferred boundary selection: prefer boundaries ahead of gemini_pause
within snap_forward_window, fall back to backward within snap_backward_window.
3. Edge cases A/B/C handled identically to before; each now also returns
natural_gap_ms so the renderer can shrink silence buffers accordingly.
4. Gap validation (A3): if the chosen pause_point has no acceptable gap nearby,
search forward for the next usable gap.
Args:
gemini_pause: Original pause point from Gemini (seconds)
@ -390,70 +392,133 @@ class WhisperService:
speaking_threshold: Max distance to consider "during speaking" (default: 2.0s)
Returns:
Tuple of (pause_point, resume_from, warning_message_or_none)
Note: resume_from always equals pause_point with the simplified algorithm
Tuple of (pause_point, resume_from, warning_or_none, natural_gap_ms)
natural_gap_ms: ms of natural silence already present at the chosen point
"""
# Step 1: Check if "during speaking" (words within ±threshold)
# Step 1: Not during speaking → use Gemini's exact point
if not self._is_during_speaking(gemini_pause, words, speaking_threshold):
# Not during speaking - use Gemini's exact pause point
natural_gap_ms = self._gap_duration_at(gemini_pause, gaps) * 1000.0
logger.info(
f"Pause point {gemini_pause:.2f}s is NOT during speaking "
f"(no words within ±{speaking_threshold}s), using Gemini's exact point"
f"Pause {gemini_pause:.2f}s not during speaking "
f"(no words within ±{speaking_threshold}s) → using exact point, "
f"natural_gap={natural_gap_ms:.0f}ms"
)
return gemini_pause, gemini_pause, None
return gemini_pause, gemini_pause, None, natural_gap_ms
# Step 2: During speaking - find nearest sentence boundary
# Step 2: During speaking — forward-preferred boundary selection (A2)
if not boundaries:
# No boundaries found at all - use Gemini's point with warning
logger.warning(f"No sentence boundaries found, using Gemini's exact point {gemini_pause:.2f}s")
return gemini_pause, gemini_pause, "No sentence boundaries found in transcript"
return gemini_pause, gemini_pause, "No sentence boundaries found in transcript", 0.0
# Find the boundary closest to the Gemini pause point
closest_boundary = min(boundaries, key=lambda b: abs(b.time - gemini_pause))
forward = [b for b in boundaries if 0 < b.time - gemini_pause <= self.snap_forward_window]
backward = [b for b in boundaries if 0 < gemini_pause - b.time <= self.snap_backward_window]
if forward:
closest_boundary = min(forward, key=lambda b: b.time - gemini_pause)
logger.debug(
f"Nearest boundary to {gemini_pause:.2f}s: {closest_boundary.boundary_type} "
f"at {closest_boundary.time:.2f}s (distance: {abs(closest_boundary.time - gemini_pause):.2f}s)"
f"Forward snap: Gemini={gemini_pause:.2f}s → boundary "
f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s "
f"(+{closest_boundary.time - gemini_pause:.2f}s)"
)
elif backward:
closest_boundary = min(backward, key=lambda b: gemini_pause - b.time)
logger.debug(
f"Backward snap (no forward boundary): Gemini={gemini_pause:.2f}s → "
f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s "
f"(-{gemini_pause - closest_boundary.time:.2f}s)"
)
else:
logger.warning(
f"No boundary within fwd={self.snap_forward_window}s / "
f"bwd={self.snap_backward_window}s of {gemini_pause:.2f}s → using exact point"
)
return (
gemini_pause, gemini_pause,
f"No boundary within snap windows of {gemini_pause:.2f}s",
0.0
)
# Case A: First sentence in video (no previous sentence) → snap to video start
# Case A: First sentence in video → snap to video start
if closest_boundary.boundary_type == "sentence_start" and not closest_boundary.has_previous_sentence:
pause_point = 0.0
logger.info(
f"Case A (first sentence): pause_point={pause_point:.2f}s "
f"(snapped to video start)"
)
return pause_point, pause_point, None
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
logger.info("Case A (first sentence): pause_point=0.00s")
return pause_point, pause_point, None, natural_gap_ms
# Case B: Last sentence in video (no next sentence) → snap to boundary time
# Case B: Last sentence in video → snap to boundary time
if closest_boundary.boundary_type == "sentence_end" and not closest_boundary.has_next_sentence:
pause_point = closest_boundary.time
logger.info(
f"Case B (last sentence): pause_point={pause_point:.2f}s "
f"(snapped to video end at sentence boundary)"
)
return pause_point, pause_point, None
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
logger.info(f"Case B (last sentence): pause_point={pause_point:.2f}s")
return pause_point, pause_point, None, natural_gap_ms
# Case C: Gap between two sentences → snap to MIDPOINT of the gap
# Case C: Gap between sentences → snap to midpoint
if closest_boundary.gap:
gap = closest_boundary.gap
# Calculate midpoint between end of previous sentence and start of next
midpoint = (gap.start + gap.end) / 2.0
pause_point = (gap.start + gap.end) / 2.0
natural_gap_ms = gap.duration * 1000.0
logger.info(
f"Case C (between sentences): gap={gap.start:.2f}s-{gap.end:.2f}s, "
f"midpoint={midpoint:.2f}s (resume from same point)"
f"Case C: gap {gap.start:.2f}s{gap.end:.2f}s → "
f"midpoint={pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms"
)
return midpoint, midpoint, None
# A3: gap too short → look for next acceptable gap
if gap.duration < self.min_acceptable_gap:
adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps)
if warn is None:
logger.info(
f"A3: gap {gap.duration:.3f}s < threshold {self.min_acceptable_gap}s, "
f"snapped forward to {adjusted:.2f}s (gap={adjusted_gap_ms:.0f}ms)"
)
return adjusted, adjusted, None, adjusted_gap_ms
return pause_point, pause_point, None, natural_gap_ms
# Fallback: No gap associated with boundary - use the boundary time directly
# This shouldn't normally happen but handles edge cases
# Fallback: no gap → boundary time, then A3 validation
pause_point = closest_boundary.time
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
logger.info(f"Fallback: boundary at {pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms")
if natural_gap_ms < self.min_acceptable_gap * 1000.0:
adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps)
if warn is None:
logger.info(
f"Fallback: Using boundary at {closest_boundary.time:.2f}s, "
f"pause_point={pause_point:.2f}s (no gap available)"
f"A3: no acceptable gap at {pause_point:.2f}s, "
f"snapped forward to {adjusted:.2f}s"
)
return pause_point, pause_point, None
return (
adjusted, adjusted,
f"Snapped forward to next acceptable gap (no gap at boundary {pause_point:.2f}s)",
adjusted_gap_ms
)
logger.warning(f"A3: no acceptable gap found near {pause_point:.2f}s in forward window")
return pause_point, pause_point, "No acceptable gap found near pause_point", 0.0
return pause_point, pause_point, None, natural_gap_ms
def _gap_duration_at(self, time: float, gaps: list[SpeechGap], epsilon: float = 0.1) -> float:
"""Return duration in seconds of any gap whose window contains `time`, or 0.0."""
for gap in gaps:
if gap.start - epsilon <= time <= gap.end + epsilon:
return gap.duration
return 0.0
def _find_next_acceptable_gap(
self, from_time: float, gaps: list[SpeechGap]
) -> tuple[float, float, str | None]:
"""Find the nearest forward gap with duration >= min_acceptable_gap.
Returns (midpoint_s, duration_ms, warning_or_none).
"""
candidates = [
g for g in gaps
if g.start > from_time
and g.start - from_time <= self.snap_forward_window
and g.duration >= self.min_acceptable_gap
]
if not candidates:
return from_time, 0.0, "No acceptable gap in forward window"
next_gap = min(candidates, key=lambda g: g.start)
midpoint = (next_gap.start + next_gap.end) / 2.0
return midpoint, next_gap.duration * 1000.0, None
def refine_all_pause_points(
self,
@ -496,13 +561,14 @@ class WhisperService:
if placement.get("pause_point") is not None:
original = placement["pause_point"]
pause_point, resume_from, warning = self.snap_pause_point(
pause_point, resume_from, warning, natural_gap_ms = self.snap_pause_point(
original, words, gaps, boundaries
)
refined["pause_point"] = pause_point
refined["resume_from"] = resume_from
refined["original_pause_point"] = original # Preserve for debugging
refined["natural_gap_ms"] = natural_gap_ms # For adaptive silence buffer
if warning:
warnings.append(f"Cue {placement['ad_cue_index']}: {warning}")

View file

@ -0,0 +1,90 @@
"""Tests for adaptive silence buffer formula in video_renderer.py (A1).
The renderer lives behind heavy GCP + FFmpeg deps only available in Docker.
These tests cover the pure arithmetic used inside _render_pause_insert_method;
they do not import VideoRendererService to stay runnable locally via pytest.
"""
import pytest
# ── Pure formula tests (no FFmpeg, no GCS) ───────────────────────────────────
#
# Mirrors the exact formula in _render_pause_insert_method:
# natural_gap = natural_gap_ms / 1000.0
# silence_before = max(0.05, default_buf - natural_gap * 0.5)
# silence_after = max(min_after, default_buf - natural_gap * 0.3)
def _buffers(
natural_gap_ms: float,
default_buf: float = 0.5,
min_after: float = 0.1,
) -> tuple[float, float]:
natural_gap = natural_gap_ms / 1000.0
silence_before = max(0.05, default_buf - natural_gap * 0.5)
silence_after = max(min_after, default_buf - natural_gap * 0.3)
return silence_before, silence_after
@pytest.mark.parametrize("natural_gap_ms,exp_before,exp_after", [
# No natural gap → full default buffers
(0, 0.50, 0.50),
# 200 ms gap: before = 0.5 - 0.1 = 0.40; after = 0.5 - 0.06 = 0.44
(200, 0.40, 0.44),
# 500 ms gap: before = 0.5 - 0.25 = 0.25; after = 0.5 - 0.15 = 0.35
(500, 0.25, 0.35),
# 1000 ms gap: before = max(0.05, 0.5-0.5)=0.05; after = max(0.1, 0.5-0.3)=0.20
(1000, 0.05, 0.20),
# 1500 ms gap: before=0.05 (floor); after = max(0.1, 0.5-0.45)=0.10 (floor)
(1500, 0.05, 0.10),
# 2000 ms gap: both at their floors
(2000, 0.05, 0.10),
])
def test_buffer_formula(natural_gap_ms, exp_before, exp_after):
before, after = _buffers(natural_gap_ms)
assert before == pytest.approx(exp_before, abs=0.001)
assert after == pytest.approx(exp_after, abs=0.001)
def test_total_freeze_duration_uses_adaptive_buffers():
"""total_freeze_duration = ad_duration + silence_before + silence_after."""
ad_duration = 5.0
natural_gap_ms = 800.0 # 800ms natural gap
before, after = _buffers(natural_gap_ms)
total = ad_duration + before + after
# before = max(0.05, 0.5 - 0.4) = 0.10; after = max(0.1, 0.5 - 0.24) = 0.26
assert total == pytest.approx(ad_duration + before + after, abs=0.001)
# Sanity: less than the old constant 1.0s overhead when there's a natural gap
assert (before + after) < 1.0
def test_buffers_never_below_floor():
"""silence_before never < 0.05, silence_after never < 0.10, regardless of gap size."""
for gap_ms in [0, 100, 500, 1000, 5000, 10000]:
before, after = _buffers(gap_ms)
assert before >= 0.05, f"silence_before={before} below floor for gap={gap_ms}ms"
assert after >= 0.10, f"silence_after={after} below floor for gap={gap_ms}ms"
def test_large_natural_gap_has_less_total_overhead_than_small_gap():
"""Larger natural gap → smaller combined silence overhead."""
before_small, after_small = _buffers(100)
before_large, after_large = _buffers(900)
assert (before_small + after_small) > (before_large + after_large)
def test_renderer_config_defaults_match_formula():
"""The config defaults used in the formula match the expected values."""
# These must stay in sync with config.py defaults:
# ad_silence_buffer_default: float = 0.5
# ad_silence_buffer_min_after: float = 0.1
DEFAULT_BUF = 0.5
MIN_AFTER = 0.1
assert DEFAULT_BUF == pytest.approx(0.5)
assert MIN_AFTER == pytest.approx(0.1)
# Verify floors are derived from these values
_, after = _buffers(10_000, DEFAULT_BUF, MIN_AFTER) # saturated gap
assert after == pytest.approx(MIN_AFTER)

View file

@ -0,0 +1,231 @@
"""Tests for the improved snap_pause_point algorithm (A1/A2/A3)."""
import sys
from unittest.mock import MagicMock
# faster_whisper ships only in the Docker image; stub it so pytest can run locally.
if 'faster_whisper' not in sys.modules:
sys.modules['faster_whisper'] = MagicMock()
import pytest
from app.services.whisper_service import (
WhisperService,
WordTimestamp,
SpeechGap,
SentenceBoundary,
)
# ── fixtures ────────────────────────────────────────────────────────────────
@pytest.fixture
def svc():
"""WhisperService instance with default settings."""
return WhisperService()
def _word(start: float, end: float, text: str = "word") -> WordTimestamp:
return WordTimestamp(word=text, start=start, end=end)
def _gap(start: float, end: float, gap_type: str = "sentence") -> SpeechGap:
return SpeechGap(start=start, end=end, duration=end - start, gap_type=gap_type)
def _boundary(
time: float,
btype: str = "sentence_end",
has_prev: bool = True,
has_next: bool = True,
gap: SpeechGap | None = None,
) -> SentenceBoundary:
return SentenceBoundary(
time=time,
boundary_type=btype,
word_index=0,
has_previous_sentence=has_prev,
has_next_sentence=has_next,
gap=gap,
)
# ── A2: forward-preferred snap ───────────────────────────────────────────────
class TestForwardPreferredSnap:
def test_picks_forward_over_equidistant_backward(self, svc):
"""Gemini=10.5s; forward boundary@11.2s and backward@9.8s — must pick forward."""
gap = _gap(11.2, 11.8)
boundaries = [
_boundary(9.8, gap=_gap(9.8, 10.0)),
_boundary(11.2, gap=gap),
]
words = [_word(9.0, 9.5), _word(10.0, 10.5), _word(11.0, 11.2)]
gaps = [_gap(9.8, 10.0), gap]
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
assert pause == pytest.approx(11.5, abs=0.01) # midpoint of 11.211.8
assert warning is None
def test_forward_boundary_within_window_is_preferred(self, svc):
"""Even a slightly farther forward boundary beats a closer backward one."""
gap_fwd = _gap(12.0, 12.6)
gap_bwd = _gap(10.1, 10.4)
boundaries = [
_boundary(10.1, gap=gap_bwd),
_boundary(12.0, gap=gap_fwd),
]
words = [_word(9.0, 10.1), _word(10.5, 12.0)]
gaps = [gap_bwd, gap_fwd]
pause, _, _, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
assert pause == pytest.approx(12.3, abs=0.01) # midpoint of 12.012.6
def test_falls_back_to_backward_when_no_forward_within_window(self, svc):
"""No forward boundary within snap_forward_window → use backward (within 1.5s)."""
# Boundary at 9.2s: distance = 10.5 - 9.2 = 1.3s ≤ snap_backward_window (1.5s) ✓
gap = _gap(9.0, 9.4)
boundaries = [_boundary(9.0, gap=gap)]
words = [_word(7.0, 9.0), _word(9.4, 10.5)]
gaps = [gap]
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
assert pause == pytest.approx(9.2, abs=0.01) # midpoint of 9.09.4
def test_no_boundary_in_any_window_returns_gemini_with_warning(self, svc):
"""Boundary exists but outside both windows → exact Gemini point + warning."""
# Put boundaries 10s away in both directions (beyond any window)
boundaries = [
_boundary(0.1, gap=_gap(0.0, 0.5)),
_boundary(50.0, gap=_gap(49.0, 50.0)),
]
words = [_word(9.0, 12.0)]
gaps = []
pause, resume, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
assert pause == pytest.approx(10.5)
assert warning is not None
assert "snap windows" in warning.lower()
def test_not_during_speaking_uses_exact_point(self, svc):
"""Pause point far from all words → no snap, exact point returned."""
boundaries = [_boundary(5.0, gap=_gap(4.8, 5.3))]
words = [_word(0.0, 3.0)] # speech ends at 3s; pause at 7s
# Gap covers 3.010.0; pause at 7.0 is inside it
gaps = [_gap(3.0, 10.0)]
pause, _, warning, natural_gap_ms = svc.snap_pause_point(7.0, words, gaps, boundaries)
assert pause == pytest.approx(7.0)
assert warning is None
# natural_gap covers the pause (7.0 is inside gap 3.010.0)
assert natural_gap_ms > 0
# ── A1: natural_gap_ms returned correctly ───────────────────────────────────
class TestNaturalGapMs:
def test_case_c_returns_gap_duration(self, svc):
"""Case C (gap midpoint) must return gap.duration * 1000 as natural_gap_ms."""
gap = _gap(10.0, 11.2)
boundaries = [_boundary(10.0, gap=gap)]
words = [_word(9.0, 10.0), _word(11.2, 12.0)]
gaps = [gap]
_, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries)
assert natural_gap_ms == pytest.approx(1200.0, abs=1.0) # 1.2s gap
def test_no_gap_returns_zero(self, svc):
"""Fallback case with no gap → natural_gap_ms == 0."""
b = _boundary(10.0, gap=None) # no gap attached
words = [_word(9.0, 10.1), _word(10.1, 11.0)]
gaps = []
_, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b])
assert natural_gap_ms == 0.0
def test_not_during_speaking_reads_gap_from_gaps_list(self, svc):
"""Not-during-speaking path should read natural gap from the gaps list."""
gap = _gap(5.0, 6.0) # covers pause at 5.5s
words = [_word(0.0, 3.0)] # all speech before 3s
gaps = [gap]
_, _, _, natural_gap_ms = svc.snap_pause_point(5.5, words, gaps, [])
assert natural_gap_ms == pytest.approx(1000.0, abs=1.0)
# ── A3: minimum gap validation ───────────────────────────────────────────────
class TestMinGapValidation:
def test_short_gap_triggers_forward_search(self, svc):
"""Case C gap < min_acceptable_gap → searches forward for a better gap."""
short_gap = _gap(10.0, 10.1) # 0.1s < 0.2s threshold
good_gap = _gap(11.5, 12.0) # 0.5s — acceptable
boundaries = [_boundary(10.0, gap=short_gap)]
words = [_word(9.0, 10.0), _word(10.2, 11.5)]
gaps = [short_gap, good_gap]
pause, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries)
# Should snap forward to midpoint of good_gap (11.5+12.0)/2 = 11.75
assert pause == pytest.approx(11.75, abs=0.01)
assert natural_gap_ms == pytest.approx(500.0, abs=1.0)
def test_short_gap_no_forward_alternative_keeps_original(self, svc):
"""Short gap, no acceptable gap ahead → stays at original point with warning."""
short_gap = _gap(10.0, 10.1)
boundaries = [_boundary(10.0, gap=short_gap)]
words = [_word(9.0, 10.0), _word(10.2, 14.0)]
gaps = [short_gap] # no other gap
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
# Falls back to midpoint of short_gap since no alternative
assert pause == pytest.approx(10.05, abs=0.01)
assert warning is None # no warning for "stayed at original"
def test_fallback_no_gap_triggers_forward_search(self, svc):
"""Fallback case (no gap on boundary) with no nearby gap → searches forward."""
b = _boundary(10.0, gap=None)
good_gap = _gap(11.0, 11.8)
words = [_word(9.0, 10.0), _word(10.1, 11.0)]
gaps = [good_gap]
pause, _, warning, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b])
assert pause == pytest.approx(11.4, abs=0.01)
assert natural_gap_ms == pytest.approx(800.0, abs=1.0)
assert warning is not None # warns that it snapped forward
# ── refine_all_pause_points integration ─────────────────────────────────────
class TestRefineAllPausePointsIntegration:
def test_stores_natural_gap_ms_on_placement(self, svc):
"""refine_all_pause_points must persist natural_gap_ms onto each placement."""
gap = _gap(10.0, 11.0)
words = [_word(8.0, 10.0), _word(11.0, 12.0)]
gaps = [gap]
placements = [{"ad_cue_index": 0, "pause_point": 10.5, "ad_duration": 3.0}]
refined, _ = svc.refine_all_pause_points(placements, words, gaps)
assert "natural_gap_ms" in refined[0]
assert refined[0]["natural_gap_ms"] == pytest.approx(1000.0, abs=1.0)
def test_no_whisper_data_returns_original_with_zero_gap(self, svc):
"""No words → _is_during_speaking=False → exact point, no warning, natural_gap_ms=0."""
placements = [{"ad_cue_index": 0, "pause_point": 5.0, "ad_duration": 2.0}]
refined, warnings = svc.refine_all_pause_points(placements, [], [])
assert refined[0]["pause_point"] == pytest.approx(5.0)
assert refined[0].get("natural_gap_ms", 0) == 0.0
# No words → not-during-speaking path → no snap → no warning
assert len(warnings) == 0

View file

@ -94,7 +94,7 @@ services:
JWT_ALG: ${JWT_ALG:-HS256}
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
COOKIE_SECURE: ${COOKIE_SECURE:-true}
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
@ -119,8 +119,8 @@ services:
# Email
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
# Microsoft Authentication
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
@ -189,7 +189,7 @@ services:
JWT_ALG: ${JWT_ALG:-HS256}
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
COOKIE_SECURE: ${COOKIE_SECURE:-true}
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
@ -215,8 +215,8 @@ services:
# Email
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
# Microsoft Authentication
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
@ -276,7 +276,7 @@ services:
JWT_ALG: ${JWT_ALG:-HS256}
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
COOKIE_SECURE: ${COOKIE_SECURE:-true}
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
@ -302,8 +302,8 @@ services:
# Email
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
# Microsoft Authentication
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
@ -369,7 +369,7 @@ services:
JWT_ALG: ${JWT_ALG:-HS256}
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
COOKIE_SECURE: ${COOKIE_SECURE:-true}
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
@ -395,8 +395,8 @@ services:
# Email
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
# Microsoft Authentication
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
@ -466,7 +466,7 @@ services:
JWT_ALG: ${JWT_ALG:-HS256}
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
COOKIE_SECURE: ${COOKIE_SECURE:-true}
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
@ -492,8 +492,8 @@ services:
# Email
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
# Microsoft Authentication
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}

View file

@ -35,7 +35,7 @@
| Environment | URL |
|-------------|-----|
| Production | `https://ai-sandbox.oliver.solutions/video-accessibility-back` |
| Production | `https://optical-dev.oliver.solutions/video-accessibility-back` |
| Local (Docker) | `http://localhost:8012` |
| OpenAPI (Swagger) | `{base_url}/docs` |

View file

@ -38,8 +38,8 @@
| `optical-web-1` | Production host — runs all Docker services | Production |
| Local machine | Developer workstation — Docker Compose local stack | Development |
**Production URL:** `https://ai-sandbox.oliver.solutions/video-accessibility`
**Production API URL:** `https://ai-sandbox.oliver.solutions/video-accessibility-back`
**Production URL:** `https://optical-dev.oliver.solutions/video-accessibility`
**Production API URL:** `https://optical-dev.oliver.solutions/video-accessibility-back`
---
@ -68,7 +68,7 @@ All services are defined in `docker-compose.yml` and share the `accessible-video
| `redis` | 6379 | — | Internal only |
| Workers | — | — | No HTTP port |
Production: nginx reverse-proxies `optical-web-1:8012``https://ai-sandbox.oliver.solutions/video-accessibility-back`.
Production: nginx reverse-proxies `optical-web-1:8012``https://optical-dev.oliver.solutions/video-accessibility-back`.
---

View file

@ -116,7 +116,7 @@ Production uses the `.env` file on optical-web-1. Key differences from `.env.exa
|----------|-----------------|
| `APP_ENV` | `production` |
| `COOKIE_SECURE` | `true` |
| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` |
| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` |
| All API keys | Real secret values |
---
@ -279,7 +279,7 @@ Copy from `.env.example`. All variables are required unless marked optional.
| `JWT_ALG` | `HS256` | No | JWT signing algorithm |
| `JWT_ACCESS_TTL_MIN` | `240` | No | Access token TTL (minutes) |
| `JWT_REFRESH_TTL_DAYS` | `7` | No | Refresh token TTL (days) |
| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` | Yes | Refresh cookie domain |
| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` | Yes | Refresh cookie domain |
| `COOKIE_SECURE` | `true` | No | Set `false` for local HTTP |
| `COOKIE_SAMESITE` | `Lax` | No | |
| `MONGODB_URI` | — | Yes | MongoDB connection string |
@ -295,7 +295,7 @@ Copy from `.env.example`. All variables are required unless marked optional.
| `ELEVENLABS_API_KEY` | — | No | ElevenLabs API key |
| `GOOGLE_TTS_CREDENTIALS` | `/secrets/gcp-credentials.json` | No | Separate TTS credentials if needed |
| `SENDGRID_API_KEY` | — | No | SendGrid API key |
| `EMAIL_FROM` | `noreply@ai-sandbox.oliver.solutions` | No | Sender address |
| `EMAIL_FROM` | `noreply@optical-dev.oliver.solutions` | No | Sender address |
| `CLIENT_BASE_URL` | — | No | Frontend URL for email links |
| `AZURE_CLIENT_ID` | — | No | Microsoft SSO client ID |
| `AZURE_AUTHORITY` | — | No | Microsoft tenant authority URL |

View file

@ -41,13 +41,83 @@ export function TimelinePreview({
const [contextMenu, setContextMenu] = useState<ContextMenuState | null>(null);
const timelineRef = useRef<HTMLDivElement>(null);
// ── Drag state (B1 marker drag, B2 freeze-block drag) ─────────────────────
// State drives rendering (tooltip, cursor, position); refs allow event handlers
// to read current values synchronously without waiting for a React re-render
// (pointer events can fire faster than React batches state commits).
const [draggingCueIndex, setDraggingCueIndex] = useState<number | null>(null);
const [dragMs, setDragMs] = useState<number | null>(null);
const draggingCueIndexRef = useRef<number | null>(null);
const dragMsRef = useRef<number | null>(null);
const dragStartXRef = useRef<number>(0);
const movedRef = useRef<boolean>(false);
const getPositionPercent = useCallback(
(ms: number) => (totalDurationMs > 0 ? (ms / totalDurationMs) * 100 : 0),
[totalDurationMs]
);
// Converts a clientX coordinate to a clamped ms value for the given pause point.
const clientXToMs = useCallback(
(clientX: number, pp: PausePointData): number => {
if (!timelineRef.current || totalDurationMs <= 0) return pp.adjusted_ms ?? pp.original_ms;
const rect = timelineRef.current.getBoundingClientRect();
const fraction = Math.max(0, Math.min(1, (clientX - rect.left) / rect.width));
const raw = Math.round(fraction * totalDurationMs);
return Math.max(pp.min_bound_ms, Math.min(pp.max_bound_ms, raw));
},
[totalDurationMs]
);
// ── Pointer handlers wired to each pause-point marker / freeze block ───────
const handleDragPointerDown = (e: React.PointerEvent, pp: PausePointData) => {
if (e.button !== 0) return; // left-button only; let right-click fall through to onContextMenu
e.stopPropagation();
(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId);
dragStartXRef.current = e.clientX;
movedRef.current = false;
const initialMs = pp.adjusted_ms ?? pp.original_ms;
draggingCueIndexRef.current = pp.cue_index;
dragMsRef.current = initialMs;
setDraggingCueIndex(pp.cue_index);
setDragMs(initialMs);
};
const handleDragPointerMove = (e: React.PointerEvent, pp: PausePointData) => {
if (draggingCueIndexRef.current !== pp.cue_index) return;
if (Math.abs(e.clientX - dragStartXRef.current) > 3) movedRef.current = true;
if (!movedRef.current) return;
const ms = clientXToMs(e.clientX, pp);
dragMsRef.current = ms;
setDragMs(ms);
};
const handleDragPointerUp = (
e: React.PointerEvent,
pp: PausePointData,
openEditorFn: () => void
) => {
if (draggingCueIndexRef.current !== pp.cue_index) return;
(e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId);
const didMove = movedRef.current;
const finalMs = dragMsRef.current;
draggingCueIndexRef.current = null;
dragMsRef.current = null;
setDraggingCueIndex(null);
setDragMs(null);
if (didMove && finalMs !== null && finalMs !== (pp.adjusted_ms ?? pp.original_ms)) {
onPausePointUpdate(pp.cue_index, finalMs);
} else if (!didMove) {
openEditorFn();
}
};
// ── Existing click / editor handlers ──────────────────────────────────────
const handlePausePointMarkerClick = (
e: React.MouseEvent,
e: React.MouseEvent | React.PointerEvent,
pausePoint: PausePointData
) => {
e.stopPropagation();
@ -60,7 +130,6 @@ export function TimelinePreview({
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
onSegmentClick(segment);
if (segment.is_freeze_frame && segment.cue_index !== null) {
// Highlight the AD cue
const pausePoint = pausePoints.find(pp => pp.cue_index === segment.cue_index);
if (pausePoint) {
onPausePointClick(pausePoint);
@ -128,7 +197,19 @@ export function TimelinePreview({
>
{/* Segments */}
{segments.map((segment) => {
const leftPercent = getPositionPercent(segment.start_ms);
// During drag of this segment's cue, offset the block visually by the drag delta
const linkedPP = segment.is_freeze_frame && segment.cue_index !== null
? pausePoints.find(pp => pp.cue_index === segment.cue_index) ?? null
: null;
const isDraggingBlock = linkedPP !== null && draggingCueIndex === linkedPP.cue_index;
let leftMs = segment.start_ms;
if (isDraggingBlock && dragMs !== null && linkedPP) {
const origPpMs = linkedPP.adjusted_ms ?? linkedPP.original_ms;
leftMs = segment.start_ms + (dragMs - origPpMs);
}
const leftPercent = getPositionPercent(leftMs);
const widthPercent = getPositionPercent(segment.duration_ms);
const isRegenerationQueued =
segment.is_freeze_frame &&
@ -138,21 +219,30 @@ export function TimelinePreview({
return (
<div
key={segment.segment_index}
className={`absolute top-0 h-full cursor-pointer transition-all hover:opacity-90 ${
className={`absolute top-0 h-full transition-all hover:opacity-90 ${
segment.is_freeze_frame
? isRegenerationQueued
? 'bg-amber-400'
: 'bg-orange-400'
: 'bg-blue-400'
: isDraggingBlock
? 'bg-orange-500 cursor-grabbing'
: 'bg-orange-400 cursor-grab'
: 'bg-blue-400 cursor-pointer'
}`}
style={{
left: `${leftPercent}%`,
width: `${Math.max(widthPercent, 0.5)}%`,
}}
onClick={() => handleSegmentClick(segment)}
onClick={() => {
if (!movedRef.current) handleSegmentClick(segment);
}}
onPointerDown={linkedPP ? (e) => handleDragPointerDown(e, linkedPP) : undefined}
onPointerMove={linkedPP ? (e) => handleDragPointerMove(e, linkedPP) : undefined}
onPointerUp={linkedPP
? (e) => handleDragPointerUp(e, linkedPP, () => handleSegmentClick(segment))
: undefined}
title={
segment.is_freeze_frame
? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''}`
? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''} — drag to move`
: `Video segment ${segment.segment_index}`
}
>
@ -170,21 +260,45 @@ export function TimelinePreview({
{/* Pause point markers */}
{pausePoints.map((pausePoint) => {
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
const leftPercent = getPositionPercent(effectiveMs);
const isDraggingThis = draggingCueIndex === pausePoint.cue_index;
const displayMs = isDraggingThis && dragMs !== null
? dragMs
: (pausePoint.adjusted_ms ?? pausePoint.original_ms);
const leftPercent = getPositionPercent(displayMs);
const isAdjusted = pausePoint.adjusted_ms !== null;
return (
<div
key={`pause-${pausePoint.cue_index}`}
className={`absolute top-0 w-2 h-full cursor-pointer z-10 hover:w-3 transition-all ${
isAdjusted ? 'bg-purple-600' : 'bg-red-600'
className={`absolute top-0 w-2 h-full z-10 transition-[width] select-none ${
isDraggingThis
? 'cursor-grabbing w-3 ' + (isAdjusted ? 'bg-purple-700' : 'bg-red-700')
: 'hover:w-3 ' + (isAdjusted ? 'bg-purple-600 cursor-grab' : 'bg-red-600 cursor-grab')
}`}
style={{ left: `${leftPercent}%` }}
onClick={(e) => { e.stopPropagation(); handlePausePointMarkerClick(e, pausePoint); }}
onContextMenu={(e) => { e.preventDefault(); e.stopPropagation(); handleContextMenuPauseOpen(pausePoint); }}
title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(effectiveMs)}${isAdjusted ? ' (adjusted)' : ''} — click to edit`}
/>
onPointerDown={(e) => handleDragPointerDown(e, pausePoint)}
onPointerMove={(e) => handleDragPointerMove(e, pausePoint)}
onPointerUp={(e) =>
handleDragPointerUp(e, pausePoint, () =>
handlePausePointMarkerClick(e, pausePoint)
)
}
onContextMenu={(e) => {
e.preventDefault();
e.stopPropagation();
handleContextMenuPauseOpen(pausePoint);
}}
title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(pausePoint.adjusted_ms ?? pausePoint.original_ms)}${isAdjusted ? ' (adjusted)' : ''} — drag to move`}
>
{/* Drag time tooltip (B3) */}
{isDraggingThis && dragMs !== null && (
<div
className="absolute -top-7 left-1/2 -translate-x-1/2 bg-gray-800 text-white text-xs rounded px-1.5 py-0.5 whitespace-nowrap pointer-events-none shadow"
>
{formatTime(dragMs)}
</div>
)}
</div>
);
})}
@ -209,7 +323,7 @@ export function TimelinePreview({
</div>
<div className="flex items-center gap-1">
<div className="w-3 h-3 bg-orange-400 rounded" />
<span>AD Audio</span>
<span>AD Audio (drag to move)</span>
</div>
<div className="flex items-center gap-1">
<div className="w-3 h-3 bg-amber-400 rounded" />
@ -217,7 +331,7 @@ export function TimelinePreview({
</div>
<div className="flex items-center gap-1">
<div className="w-1 h-3 bg-red-600" />
<span>Pause Point</span>
<span>Pause Point (drag to move)</span>
</div>
<div className="flex items-center gap-1">
<div className="w-1 h-3 bg-purple-600" />

View file

@ -0,0 +1,342 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { act } from 'react'
import { render, fireEvent } from '../../../test/utils'
import { TimelinePreview } from '../TimelinePreview'
import type { PausePointData, VideoSegmentMetadata } from '../../../types/api'
// ── helpers ──────────────────────────────────────────────────────────────────
function makePausePoint(overrides: Partial<PausePointData> = {}): PausePointData {
return {
cue_index: 0,
original_ms: 5000,
source_ms: null,
adjusted_ms: null,
min_bound_ms: 1000,
max_bound_ms: 9000,
natural_gap_ms: 0,
...overrides,
}
}
function makeSegment(overrides: Partial<VideoSegmentMetadata> = {}): VideoSegmentMetadata {
return {
segment_index: 0,
start_ms: 0,
end_ms: 5000,
gcs_uri: 'gs://test/segment.mp4',
duration_ms: 5000,
is_freeze_frame: false,
cue_index: null,
...overrides,
}
}
function makeFreezeSegment(cueIndex: number, startMs: number): VideoSegmentMetadata {
return makeSegment({
segment_index: 1,
is_freeze_frame: true,
cue_index: cueIndex,
start_ms: startMs,
end_ms: startMs + 6000,
duration_ms: 6000,
})
}
// Default props used in most tests
function defaultProps(overrides: Record<string, unknown> = {}) {
return {
segments: [],
pausePoints: [],
totalDurationMs: 10000,
currentTimeMs: 0,
onSegmentClick: vi.fn(),
onPausePointClick: vi.fn(),
onPausePointUpdate: vi.fn(),
onRegenerateTTS: vi.fn(),
regenerationQueue: [],
...overrides,
}
}
// Mock pointer capture (jsdom does not implement setPointerCapture)
beforeEach(() => {
Element.prototype.setPointerCapture = vi.fn()
Element.prototype.releasePointerCapture = vi.fn()
})
// jsdom's PointerEvent doesn't expose MouseEvent properties (button, clientX) from init —
// they come back as undefined, breaking `if (e.button !== 0)` and delta math.
// Work-around: dispatch MouseEvents typed as pointer events. MouseEvent correctly handles
// all MouseEventInit fields; React 19 routes by event.type, not constructor type.
function ptrDown(el: HTMLElement, init: { button?: number; clientX: number }) {
fireEvent(el, new MouseEvent('pointerdown', {
bubbles: true, cancelable: true,
button: init.button ?? 0, clientX: init.clientX,
}))
}
function ptrMove(el: HTMLElement, init: { clientX: number }) {
fireEvent(el, new MouseEvent('pointermove', {
bubbles: true, cancelable: true, clientX: init.clientX,
}))
}
function ptrUp(el: HTMLElement, init: { clientX: number }) {
fireEvent(el, new MouseEvent('pointerup', {
bubbles: true, cancelable: true, clientX: init.clientX,
}))
}
// Helper to mock the timeline container rect so clientX→ms math works
function mockTimelineRect(container: HTMLElement) {
const timelineDiv = container.querySelector('.relative.h-16') as HTMLElement
if (timelineDiv) {
vi.spyOn(timelineDiv, 'getBoundingClientRect').mockReturnValue({
left: 0, right: 1000, width: 1000, top: 0, bottom: 64, height: 64, x: 0, y: 0,
toJSON: () => {},
} as DOMRect)
}
return timelineDiv
}
// ── Marker drag tests (B1) ────────────────────────────────────────────────────
describe('TimelinePreview — marker drag', () => {
it('calls onPausePointUpdate when marker is dragged and released', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
expect(marker).toBeTruthy()
// Drag: down at x=500 (5s), move to x=700 (7s), up at x=700
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 }) // >3px threshold
ptrMove(marker, { clientX: 700 })
ptrUp(marker, { clientX: 700 })
})
expect(onPausePointUpdate).toHaveBeenCalledOnce()
const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0]
expect(cueIndex).toBe(0)
// 700/1000 * 10000 = 7000ms, within bounds [1000, 9000]
expect(adjustedMs).toBe(7000)
})
it('opens editor popover on click (no movement)', async () => {
const onPausePointUpdate = vi.fn()
const onPausePointClick = vi.fn()
const pp = makePausePoint({ cue_index: 0 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate, onPausePointClick })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
// No move — stayed at same position
ptrUp(marker, { clientX: 500 })
})
expect(onPausePointUpdate).not.toHaveBeenCalled()
expect(onPausePointClick).toHaveBeenCalledWith(pp)
})
it('does NOT call onPausePointUpdate if final position equals original', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
// Drag to 500 → 504 → 500 (same ms as start = 5000ms)
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 500 })
ptrUp(marker, { clientX: 500 })
})
expect(onPausePointUpdate).not.toHaveBeenCalled()
})
it('clamps drag to min_bound_ms', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000, min_bound_ms: 2000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
// Drag to x=50 → 500ms, below min_bound_ms=2000ms
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 50 })
ptrUp(marker, { clientX: 50 })
})
const [, adjustedMs] = onPausePointUpdate.mock.calls[0]
expect(adjustedMs).toBe(2000) // clamped to min_bound_ms
})
it('clamps drag to max_bound_ms', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000, max_bound_ms: 8000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
// Drag to x=950 → 9500ms, above max_bound_ms=8000ms
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 950 })
ptrUp(marker, { clientX: 950 })
})
const [, adjustedMs] = onPausePointUpdate.mock.calls[0]
expect(adjustedMs).toBe(8000) // clamped to max_bound_ms
})
it('right-click does NOT start drag (context menu allowed)', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
await act(async () => {
ptrDown(marker, { button: 2, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 700 })
ptrUp(marker, { clientX: 700 })
})
expect(onPausePointUpdate).not.toHaveBeenCalled()
})
})
// ── Freeze-block drag tests (B2) ─────────────────────────────────────────────
describe('TimelinePreview — freeze-block drag', () => {
it('calls onPausePointUpdate when freeze block is dragged', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
const freeze = makeFreezeSegment(0, 5000)
const { container } = render(
<TimelinePreview
{...defaultProps({
pausePoints: [pp],
segments: [freeze],
onPausePointUpdate,
})}
/>
)
mockTimelineRect(container)
const block = container.querySelector('[title*="AD Cue 1"]') as HTMLElement
expect(block).toBeTruthy()
// Drag block from x=500 → x=600 (+1s = 1000ms delta)
await act(async () => {
ptrDown(block, { button: 0, clientX: 500 })
ptrMove(block, { clientX: 504 })
ptrMove(block, { clientX: 600 })
ptrUp(block, { clientX: 600 })
})
expect(onPausePointUpdate).toHaveBeenCalledOnce()
const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0]
expect(cueIndex).toBe(0)
expect(adjustedMs).toBe(6000) // 600/1000 * 10000 = 6000ms
})
it('non-freeze segments do not start drag', async () => {
const onPausePointUpdate = vi.fn()
const seg = makeSegment({ segment_index: 0, is_freeze_frame: false, start_ms: 0, duration_ms: 4000 })
const { container } = render(
<TimelinePreview
{...defaultProps({ segments: [seg], onPausePointUpdate })}
/>
)
mockTimelineRect(container)
const videoSeg = container.querySelector('[title*="Video segment"]') as HTMLElement
if (!videoSeg) return // no drag handlers on non-freeze segments → trivially passes
await act(async () => {
ptrDown(videoSeg, { button: 0, clientX: 200 })
ptrMove(videoSeg, { clientX: 204 })
ptrMove(videoSeg, { clientX: 400 })
ptrUp(videoSeg, { clientX: 400 })
})
expect(onPausePointUpdate).not.toHaveBeenCalled()
})
})
// ── Drag tooltip (B3) ─────────────────────────────────────────────────────────
describe('TimelinePreview — drag tooltip', () => {
it('shows time tooltip during marker drag', async () => {
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp] })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
// Wrap in act so React flushes state updates before we query the DOM
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 700 })
})
const tooltip = container.querySelector('.bg-gray-800') as HTMLElement
expect(tooltip).toBeTruthy()
expect(tooltip.textContent).toMatch(/\d:\d{2}/)
})
it('hides tooltip after drag ends', async () => {
const onPausePointUpdate = vi.fn()
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
const { container } = render(
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
)
mockTimelineRect(container)
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
await act(async () => {
ptrDown(marker, { button: 0, clientX: 500 })
ptrMove(marker, { clientX: 504 })
ptrMove(marker, { clientX: 700 })
ptrUp(marker, { clientX: 700 })
})
const tooltip = container.querySelector('.bg-gray-800')
expect(tooltip).toBeNull()
})
})

View file

@ -38,6 +38,7 @@ global.ResizeObserver = vi.fn(() => ({
global.URL.createObjectURL = vi.fn(() => 'mock-object-url')
global.URL.revokeObjectURL = vi.fn()
// Mock HTMLMediaElement for video components
Object.defineProperty(HTMLMediaElement.prototype, 'load', {
writable: true,

View file

@ -647,6 +647,7 @@ export interface PausePointData {
adjusted_ms: number | null;
min_bound_ms: number;
max_bound_ms: number;
natural_gap_ms?: number; // Duration (ms) of natural silence at the pause point; 0 = none
}
export interface VideoSegmentMetadata {

View file

@ -203,7 +203,7 @@ display_summary() {
echo -e "${GREEN}Frontend successfully deployed!${NC}"
echo ""
echo "Deployment location: $DEPLOY_DIR"
echo "Frontend URL: https://ai-sandbox.oliver.solutions/video-accessibility"
echo "Frontend URL: https://optical-dev.oliver.solutions/video-accessibility"
echo ""
echo "To verify the deployment, visit the URL above in your browser."
echo ""