diff --git a/apache-config-snippet.conf b/apache-config-snippet.conf index 4e583be..b650c03 100644 --- a/apache-config-snippet.conf +++ b/apache-config-snippet.conf @@ -1,6 +1,6 @@ # ============================================================================= # Apache config fragment — Accessible Video Platform -# Inject into: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf +# Inject into: /etc/apache2/sites-available/optical-dev.oliver.solutions-ssl.conf # # Required modules: # sudo a2enmod proxy proxy_http proxy_wstunnel rewrite headers @@ -70,7 +70,7 @@ Alias /video-accessibility /var/www/html/video-accessibility # ============================================================================= # # -# ServerName ai-sandbox.oliver.solutions +# ServerName optical-dev.oliver.solutions # DocumentRoot /var/www/html # # SSLEngine on @@ -82,8 +82,8 @@ Alias /video-accessibility /var/www/html/video-accessibility # # # — paste the block above here — # -# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log -# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined +# ErrorLog ${APACHE_LOG_DIR}/optical-dev-error.log +# CustomLog ${APACHE_LOG_DIR}/optical-dev-access.log combined # # ============================================================================= @@ -91,6 +91,6 @@ Alias /video-accessibility /var/www/html/video-accessibility # ============================================================================= # sudo apache2ctl configtest # sudo systemctl reload apache2 -# curl -I https://ai-sandbox.oliver.solutions/video-accessibility/ -# curl https://ai-sandbox.oliver.solutions/video-accessibility/api/v1/health -# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility/api/v1/ws/job-list +# curl -I https://optical-dev.oliver.solutions/video-accessibility/ +# curl https://optical-dev.oliver.solutions/video-accessibility/api/v1/health +# wscat -c wss://optical-dev.oliver.solutions/video-accessibility/api/v1/ws/job-list diff --git a/backend/app/api/v1/routes_share.py b/backend/app/api/v1/routes_share.py index 11973eb..3df3c83 100644 --- a/backend/app/api/v1/routes_share.py +++ b/backend/app/api/v1/routes_share.py @@ -21,8 +21,7 @@ _JOBS = "jobs" def _share_url(token: str) -> str: - base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility") - return f"{base}/share/{token}" + return f"{settings.app_url}/share/{token}" # ── Request schemas ─────────────────────────────────────────────────────────── diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 0c744cb..6777e1a 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -6,6 +6,7 @@ class Settings(BaseSettings): # App app_env: str = "dev" api_base_url: str = "http://localhost:8000" + app_url: str = "https://optical-dev.oliver.solutions/video-accessibility" # Auth jwt_secret: str @@ -247,6 +248,14 @@ class Settings(BaseSettings): whisper_sentence_gap_threshold: float = 0.5 # Gap duration to classify as sentence boundary whisper_phrase_gap_threshold: float = 0.3 # Gap duration to classify as phrase boundary whisper_min_gap_threshold: float = 0.15 # Minimum gap duration to consider + # Forward-preferred snap windows (A2) + whisper_snap_forward_window: float = 4.0 # Prefer boundary up to N seconds ahead of Gemini point + whisper_snap_backward_window: float = 1.5 # Fall back to boundary up to N seconds behind + # Adaptive silence buffer (A1) + ad_silence_buffer_default: float = 0.5 # Base silence duration (s) before/after AD audio + ad_silence_buffer_min_after: float = 0.1 # Minimum silence after AD audio + # Minimum gap required at the chosen pause point (A3) + ad_min_acceptable_gap: float = 0.2 # Seconds; points with shorter gaps trigger forward search # Cloud Run Service URLs (empty = use local processing) # When set, CPU-intensive work is offloaded to Cloud Run with autoscaling diff --git a/backend/app/models/job.py b/backend/app/models/job.py index 56b0d13..0c3fd84 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -90,6 +90,7 @@ class PausePointData(BaseModel): adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original min_bound_ms: float # Minimum allowed value (end of previous AD segment) max_bound_ms: float # Maximum allowed value (start of next AD segment) + natural_gap_ms: float = 0.0 # Natural silence already present at pause point (ms); used to size silence buffers class VideoSegmentMetadata(BaseModel): diff --git a/backend/app/services/language_qc.py b/backend/app/services/language_qc.py index 78f3a57..6d0e2dd 100644 --- a/backend/app/services/language_qc.py +++ b/backend/app/services/language_qc.py @@ -116,8 +116,7 @@ def _qc_recipients( def _deep_link(job_id: str, lang: str) -> str: from ..core.config import settings - base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility") - return f"{base}/admin/qc/{job_id}#lang-{lang}" + return f"{settings.app_url}/admin/qc/{job_id}#lang-{lang}" # ── Auto-assignment ─────────────────────────────────────────────────────────── diff --git a/backend/app/services/video_renderer.py b/backend/app/services/video_renderer.py index b031673..2a05c36 100644 --- a/backend/app/services/video_renderer.py +++ b/backend/app/services/video_renderer.py @@ -54,6 +54,9 @@ class VideoRendererService: # Audio ducking settings self.duck_level = getattr(settings, 'accessible_video_duck_level', 0.3) self.duck_fade_ms = getattr(settings, 'accessible_video_duck_fade_ms', 200) + # Adaptive silence buffer settings (A1) + self._silence_buffer_default = getattr(settings, 'ad_silence_buffer_default', 0.5) + self._silence_buffer_min_after = getattr(settings, 'ad_silence_buffer_min_after', 0.1) # Cloud Run support self._gcs_client: storage.Client | None = None # Source video caching for Cloud Run (uploaded once, reused across operations) @@ -599,18 +602,38 @@ class VideoRendererService: final_segment_needed = final_segment_start < source_duration # ============================================================ - # PARALLEL PHASE 1: Generate shared silence + extract all frames + all video segments + # PARALLEL PHASE 1: Generate per-cue silence files + extract all frames + video segments # ============================================================ logger.info(f"Phase 1: Parallel extraction of {len(valid_placements)} frames and video segments") - silence_duration = 0.5 # 500ms shared by all - silence_path = temp_dir_path / "silence_shared.m4a" + # Compute adaptive silence buffers per cue (A1): + # natural_gap_ms already present at the pause point reduces how much silence we add. + _buf_default = self._silence_buffer_default + _buf_min_after = self._silence_buffer_min_after + silence_pre_paths: dict[int, str] = {} + silence_post_paths: dict[int, str] = {} + for p in valid_placements: + i = p["index"] + natural_gap = (p.get("natural_gap_ms") or 0.0) / 1000.0 + silence_before = max(0.05, _buf_default - natural_gap * 0.5) + silence_after = max(_buf_min_after, _buf_default - natural_gap * 0.3) + p["silence_before"] = silence_before + p["silence_after"] = silence_after + silence_pre_paths[i] = str(temp_dir_path / f"silence_pre_{i}.m4a") + silence_post_paths[i] = str(temp_dir_path / f"silence_post_{i}.m4a") + logger.debug( + f"Cue {p['cue_index']}: natural_gap={natural_gap*1000:.0f}ms → " + f"silence_before={silence_before*1000:.0f}ms silence_after={silence_after*1000:.0f}ms" + ) # Build tasks for phase 1 phase1_tasks = [] - # Task: Generate silence (just once, shared by all) - phase1_tasks.append(self._generate_silence(silence_duration, str(silence_path), video_props)) + # Tasks: Generate per-cue silence files + for p in valid_placements: + i = p["index"] + phase1_tasks.append(self._generate_silence(p["silence_before"], silence_pre_paths[i], video_props)) + phase1_tasks.append(self._generate_silence(p["silence_after"], silence_post_paths[i], video_props)) # Tasks: Extract all video segments video_segment_paths = {} @@ -667,7 +690,7 @@ class VideoRendererService: combined_audio_path = temp_dir_path / f"combined_audio_{i}.m4a" combined_audio_paths[i] = str(combined_audio_path) phase2_tasks.append(self._concatenate_audio( - [str(silence_path), p["ad_mp3_path"], str(silence_path)], + [silence_pre_paths[i], p["ad_mp3_path"], silence_post_paths[i]], str(combined_audio_path), video_props )) @@ -686,11 +709,14 @@ class VideoRendererService: i = p["index"] cue_index = p["cue_index"] ad_duration = p["ad_duration"] - total_freeze_duration = ad_duration + (2 * silence_duration) + silence_before = p["silence_before"] + silence_after = p["silence_after"] + total_freeze_duration = ad_duration + silence_before + silence_after logger.info( - f"Cue {cue_index}: Freeze segment with silence buffers - " - f"500ms + AD={ad_duration:.2f}s + 500ms = {total_freeze_duration:.2f}s" + f"Cue {cue_index}: Freeze segment — " + f"pre={silence_before*1000:.0f}ms + AD={ad_duration:.2f}s + " + f"post={silence_after*1000:.0f}ms = {total_freeze_duration:.2f}s" ) freeze_segment_path = temp_dir_path / f"freeze_segment_{i}.mp4" @@ -720,7 +746,7 @@ class VideoRendererService: p["actual_freeze_duration"] = actual_duration # Log any discrepancy between expected and actual duration - expected = p["ad_duration"] + (2 * silence_duration) + expected = p["ad_duration"] + p["silence_before"] + p["silence_after"] discrepancy = actual_duration - expected if abs(discrepancy) > 0.01: # 10ms threshold logger.warning( @@ -1535,7 +1561,7 @@ class VideoRendererService: """ Generate a silent audio file of specified duration. - Used to create 500ms silence buffers before/after AD audio. + Used to create adaptive silence buffers before/after AD audio. """ if self._use_cloud_run: await self._generate_silence_cloud_run(duration, output_path, props) diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 2675042..6876fc1 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -97,8 +97,12 @@ class WhisperService: self.phrase_gap_threshold = _get_setting('whisper_phrase_gap_threshold', 0.3) self.min_gap_threshold = _get_setting('whisper_min_gap_threshold', 0.15) - # Snapping configuration + # Snapping configuration (A2: forward-preferred snap) self.max_search_window = _get_setting('whisper_max_search_window', 30.0) + self.snap_forward_window = _get_setting('whisper_snap_forward_window', 4.0) + self.snap_backward_window = _get_setting('whisper_snap_backward_window', 1.5) + # Minimum gap duration to accept as a pause insertion point (A3: gap validation) + self.min_acceptable_gap = _get_setting('ad_min_acceptable_gap', 0.2) @property def model(self) -> WhisperModel: @@ -367,20 +371,18 @@ class WhisperService: gaps: list[SpeechGap], boundaries: list[SentenceBoundary], speaking_threshold: float = 2.0 - ) -> tuple[float, float, str | None]: + ) -> tuple[float, float, str | None, float]: """ - Snap a Gemini pause point to the nearest sentence boundary. + Snap a Gemini pause point to an appropriate sentence boundary. - Simplified algorithm: - 1. Check if "during speaking" (words within ±threshold) - - If NO → Use Gemini's exact pause point - 2. If during speaking, find nearest sentence gap and snap to MIDPOINT - 3. Edge cases: - - Case A: First sentence in video → pause at video start (0.0) - - Case B: Last sentence in video → pause at video end - - The video renderer adds 500ms silence buffers before/after AD audio, - so no overlap or catch-up logic is needed here. + Algorithm: + 1. If NOT during speaking → use Gemini's exact point (already in a natural gap). + 2. Forward-preferred boundary selection: prefer boundaries ahead of gemini_pause + within snap_forward_window, fall back to backward within snap_backward_window. + 3. Edge cases A/B/C handled identically to before; each now also returns + natural_gap_ms so the renderer can shrink silence buffers accordingly. + 4. Gap validation (A3): if the chosen pause_point has no acceptable gap nearby, + search forward for the next usable gap. Args: gemini_pause: Original pause point from Gemini (seconds) @@ -390,70 +392,133 @@ class WhisperService: speaking_threshold: Max distance to consider "during speaking" (default: 2.0s) Returns: - Tuple of (pause_point, resume_from, warning_message_or_none) - Note: resume_from always equals pause_point with the simplified algorithm + Tuple of (pause_point, resume_from, warning_or_none, natural_gap_ms) + natural_gap_ms: ms of natural silence already present at the chosen point """ - # Step 1: Check if "during speaking" (words within ±threshold) + # Step 1: Not during speaking → use Gemini's exact point if not self._is_during_speaking(gemini_pause, words, speaking_threshold): - # Not during speaking - use Gemini's exact pause point + natural_gap_ms = self._gap_duration_at(gemini_pause, gaps) * 1000.0 logger.info( - f"Pause point {gemini_pause:.2f}s is NOT during speaking " - f"(no words within ±{speaking_threshold}s), using Gemini's exact point" + f"Pause {gemini_pause:.2f}s not during speaking " + f"(no words within ±{speaking_threshold}s) → using exact point, " + f"natural_gap={natural_gap_ms:.0f}ms" ) - return gemini_pause, gemini_pause, None + return gemini_pause, gemini_pause, None, natural_gap_ms - # Step 2: During speaking - find nearest sentence boundary + # Step 2: During speaking — forward-preferred boundary selection (A2) if not boundaries: - # No boundaries found at all - use Gemini's point with warning logger.warning(f"No sentence boundaries found, using Gemini's exact point {gemini_pause:.2f}s") - return gemini_pause, gemini_pause, "No sentence boundaries found in transcript" + return gemini_pause, gemini_pause, "No sentence boundaries found in transcript", 0.0 - # Find the boundary closest to the Gemini pause point - closest_boundary = min(boundaries, key=lambda b: abs(b.time - gemini_pause)) + forward = [b for b in boundaries if 0 < b.time - gemini_pause <= self.snap_forward_window] + backward = [b for b in boundaries if 0 < gemini_pause - b.time <= self.snap_backward_window] - logger.debug( - f"Nearest boundary to {gemini_pause:.2f}s: {closest_boundary.boundary_type} " - f"at {closest_boundary.time:.2f}s (distance: {abs(closest_boundary.time - gemini_pause):.2f}s)" - ) + if forward: + closest_boundary = min(forward, key=lambda b: b.time - gemini_pause) + logger.debug( + f"Forward snap: Gemini={gemini_pause:.2f}s → boundary " + f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s " + f"(+{closest_boundary.time - gemini_pause:.2f}s)" + ) + elif backward: + closest_boundary = min(backward, key=lambda b: gemini_pause - b.time) + logger.debug( + f"Backward snap (no forward boundary): Gemini={gemini_pause:.2f}s → " + f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s " + f"(-{gemini_pause - closest_boundary.time:.2f}s)" + ) + else: + logger.warning( + f"No boundary within fwd={self.snap_forward_window}s / " + f"bwd={self.snap_backward_window}s of {gemini_pause:.2f}s → using exact point" + ) + return ( + gemini_pause, gemini_pause, + f"No boundary within snap windows of {gemini_pause:.2f}s", + 0.0 + ) - # Case A: First sentence in video (no previous sentence) → snap to video start + # Case A: First sentence in video → snap to video start if closest_boundary.boundary_type == "sentence_start" and not closest_boundary.has_previous_sentence: pause_point = 0.0 - logger.info( - f"Case A (first sentence): pause_point={pause_point:.2f}s " - f"(snapped to video start)" - ) - return pause_point, pause_point, None + natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0 + logger.info("Case A (first sentence): pause_point=0.00s") + return pause_point, pause_point, None, natural_gap_ms - # Case B: Last sentence in video (no next sentence) → snap to boundary time + # Case B: Last sentence in video → snap to boundary time if closest_boundary.boundary_type == "sentence_end" and not closest_boundary.has_next_sentence: pause_point = closest_boundary.time - logger.info( - f"Case B (last sentence): pause_point={pause_point:.2f}s " - f"(snapped to video end at sentence boundary)" - ) - return pause_point, pause_point, None + natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0 + logger.info(f"Case B (last sentence): pause_point={pause_point:.2f}s") + return pause_point, pause_point, None, natural_gap_ms - # Case C: Gap between two sentences → snap to MIDPOINT of the gap + # Case C: Gap between sentences → snap to midpoint if closest_boundary.gap: gap = closest_boundary.gap - # Calculate midpoint between end of previous sentence and start of next - midpoint = (gap.start + gap.end) / 2.0 - + pause_point = (gap.start + gap.end) / 2.0 + natural_gap_ms = gap.duration * 1000.0 logger.info( - f"Case C (between sentences): gap={gap.start:.2f}s-{gap.end:.2f}s, " - f"midpoint={midpoint:.2f}s (resume from same point)" + f"Case C: gap {gap.start:.2f}s–{gap.end:.2f}s → " + f"midpoint={pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms" ) - return midpoint, midpoint, None + # A3: gap too short → look for next acceptable gap + if gap.duration < self.min_acceptable_gap: + adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps) + if warn is None: + logger.info( + f"A3: gap {gap.duration:.3f}s < threshold {self.min_acceptable_gap}s, " + f"snapped forward to {adjusted:.2f}s (gap={adjusted_gap_ms:.0f}ms)" + ) + return adjusted, adjusted, None, adjusted_gap_ms + return pause_point, pause_point, None, natural_gap_ms - # Fallback: No gap associated with boundary - use the boundary time directly - # This shouldn't normally happen but handles edge cases + # Fallback: no gap → boundary time, then A3 validation pause_point = closest_boundary.time - logger.info( - f"Fallback: Using boundary at {closest_boundary.time:.2f}s, " - f"pause_point={pause_point:.2f}s (no gap available)" - ) - return pause_point, pause_point, None + natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0 + logger.info(f"Fallback: boundary at {pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms") + + if natural_gap_ms < self.min_acceptable_gap * 1000.0: + adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps) + if warn is None: + logger.info( + f"A3: no acceptable gap at {pause_point:.2f}s, " + f"snapped forward to {adjusted:.2f}s" + ) + return ( + adjusted, adjusted, + f"Snapped forward to next acceptable gap (no gap at boundary {pause_point:.2f}s)", + adjusted_gap_ms + ) + logger.warning(f"A3: no acceptable gap found near {pause_point:.2f}s in forward window") + return pause_point, pause_point, "No acceptable gap found near pause_point", 0.0 + + return pause_point, pause_point, None, natural_gap_ms + + def _gap_duration_at(self, time: float, gaps: list[SpeechGap], epsilon: float = 0.1) -> float: + """Return duration in seconds of any gap whose window contains `time`, or 0.0.""" + for gap in gaps: + if gap.start - epsilon <= time <= gap.end + epsilon: + return gap.duration + return 0.0 + + def _find_next_acceptable_gap( + self, from_time: float, gaps: list[SpeechGap] + ) -> tuple[float, float, str | None]: + """Find the nearest forward gap with duration >= min_acceptable_gap. + + Returns (midpoint_s, duration_ms, warning_or_none). + """ + candidates = [ + g for g in gaps + if g.start > from_time + and g.start - from_time <= self.snap_forward_window + and g.duration >= self.min_acceptable_gap + ] + if not candidates: + return from_time, 0.0, "No acceptable gap in forward window" + next_gap = min(candidates, key=lambda g: g.start) + midpoint = (next_gap.start + next_gap.end) / 2.0 + return midpoint, next_gap.duration * 1000.0, None def refine_all_pause_points( self, @@ -496,13 +561,14 @@ class WhisperService: if placement.get("pause_point") is not None: original = placement["pause_point"] - pause_point, resume_from, warning = self.snap_pause_point( + pause_point, resume_from, warning, natural_gap_ms = self.snap_pause_point( original, words, gaps, boundaries ) refined["pause_point"] = pause_point refined["resume_from"] = resume_from refined["original_pause_point"] = original # Preserve for debugging + refined["natural_gap_ms"] = natural_gap_ms # For adaptive silence buffer if warning: warnings.append(f"Cue {placement['ad_cue_index']}: {warning}") diff --git a/backend/tests/unit/test_video_renderer_buffers.py b/backend/tests/unit/test_video_renderer_buffers.py new file mode 100644 index 0000000..cfc32ae --- /dev/null +++ b/backend/tests/unit/test_video_renderer_buffers.py @@ -0,0 +1,90 @@ +"""Tests for adaptive silence buffer formula in video_renderer.py (A1). + +The renderer lives behind heavy GCP + FFmpeg deps only available in Docker. +These tests cover the pure arithmetic used inside _render_pause_insert_method; +they do not import VideoRendererService to stay runnable locally via pytest. +""" + +import pytest + + +# ── Pure formula tests (no FFmpeg, no GCS) ─────────────────────────────────── +# +# Mirrors the exact formula in _render_pause_insert_method: +# natural_gap = natural_gap_ms / 1000.0 +# silence_before = max(0.05, default_buf - natural_gap * 0.5) +# silence_after = max(min_after, default_buf - natural_gap * 0.3) + +def _buffers( + natural_gap_ms: float, + default_buf: float = 0.5, + min_after: float = 0.1, +) -> tuple[float, float]: + natural_gap = natural_gap_ms / 1000.0 + silence_before = max(0.05, default_buf - natural_gap * 0.5) + silence_after = max(min_after, default_buf - natural_gap * 0.3) + return silence_before, silence_after + + +@pytest.mark.parametrize("natural_gap_ms,exp_before,exp_after", [ + # No natural gap → full default buffers + (0, 0.50, 0.50), + # 200 ms gap: before = 0.5 - 0.1 = 0.40; after = 0.5 - 0.06 = 0.44 + (200, 0.40, 0.44), + # 500 ms gap: before = 0.5 - 0.25 = 0.25; after = 0.5 - 0.15 = 0.35 + (500, 0.25, 0.35), + # 1000 ms gap: before = max(0.05, 0.5-0.5)=0.05; after = max(0.1, 0.5-0.3)=0.20 + (1000, 0.05, 0.20), + # 1500 ms gap: before=0.05 (floor); after = max(0.1, 0.5-0.45)=0.10 (floor) + (1500, 0.05, 0.10), + # 2000 ms gap: both at their floors + (2000, 0.05, 0.10), +]) +def test_buffer_formula(natural_gap_ms, exp_before, exp_after): + before, after = _buffers(natural_gap_ms) + assert before == pytest.approx(exp_before, abs=0.001) + assert after == pytest.approx(exp_after, abs=0.001) + + +def test_total_freeze_duration_uses_adaptive_buffers(): + """total_freeze_duration = ad_duration + silence_before + silence_after.""" + ad_duration = 5.0 + natural_gap_ms = 800.0 # 800ms natural gap + + before, after = _buffers(natural_gap_ms) + total = ad_duration + before + after + + # before = max(0.05, 0.5 - 0.4) = 0.10; after = max(0.1, 0.5 - 0.24) = 0.26 + assert total == pytest.approx(ad_duration + before + after, abs=0.001) + # Sanity: less than the old constant 1.0s overhead when there's a natural gap + assert (before + after) < 1.0 + + +def test_buffers_never_below_floor(): + """silence_before never < 0.05, silence_after never < 0.10, regardless of gap size.""" + for gap_ms in [0, 100, 500, 1000, 5000, 10000]: + before, after = _buffers(gap_ms) + assert before >= 0.05, f"silence_before={before} below floor for gap={gap_ms}ms" + assert after >= 0.10, f"silence_after={after} below floor for gap={gap_ms}ms" + + +def test_large_natural_gap_has_less_total_overhead_than_small_gap(): + """Larger natural gap → smaller combined silence overhead.""" + before_small, after_small = _buffers(100) + before_large, after_large = _buffers(900) + + assert (before_small + after_small) > (before_large + after_large) + + +def test_renderer_config_defaults_match_formula(): + """The config defaults used in the formula match the expected values.""" + # These must stay in sync with config.py defaults: + # ad_silence_buffer_default: float = 0.5 + # ad_silence_buffer_min_after: float = 0.1 + DEFAULT_BUF = 0.5 + MIN_AFTER = 0.1 + assert DEFAULT_BUF == pytest.approx(0.5) + assert MIN_AFTER == pytest.approx(0.1) + # Verify floors are derived from these values + _, after = _buffers(10_000, DEFAULT_BUF, MIN_AFTER) # saturated gap + assert after == pytest.approx(MIN_AFTER) diff --git a/backend/tests/unit/test_whisper_snap.py b/backend/tests/unit/test_whisper_snap.py new file mode 100644 index 0000000..1792f0d --- /dev/null +++ b/backend/tests/unit/test_whisper_snap.py @@ -0,0 +1,231 @@ +"""Tests for the improved snap_pause_point algorithm (A1/A2/A3).""" + +import sys +from unittest.mock import MagicMock + +# faster_whisper ships only in the Docker image; stub it so pytest can run locally. +if 'faster_whisper' not in sys.modules: + sys.modules['faster_whisper'] = MagicMock() + +import pytest +from app.services.whisper_service import ( + WhisperService, + WordTimestamp, + SpeechGap, + SentenceBoundary, +) + + +# ── fixtures ──────────────────────────────────────────────────────────────── + +@pytest.fixture +def svc(): + """WhisperService instance with default settings.""" + return WhisperService() + + +def _word(start: float, end: float, text: str = "word") -> WordTimestamp: + return WordTimestamp(word=text, start=start, end=end) + + +def _gap(start: float, end: float, gap_type: str = "sentence") -> SpeechGap: + return SpeechGap(start=start, end=end, duration=end - start, gap_type=gap_type) + + +def _boundary( + time: float, + btype: str = "sentence_end", + has_prev: bool = True, + has_next: bool = True, + gap: SpeechGap | None = None, +) -> SentenceBoundary: + return SentenceBoundary( + time=time, + boundary_type=btype, + word_index=0, + has_previous_sentence=has_prev, + has_next_sentence=has_next, + gap=gap, + ) + + +# ── A2: forward-preferred snap ─────────────────────────────────────────────── + +class TestForwardPreferredSnap: + def test_picks_forward_over_equidistant_backward(self, svc): + """Gemini=10.5s; forward boundary@11.2s and backward@9.8s — must pick forward.""" + gap = _gap(11.2, 11.8) + boundaries = [ + _boundary(9.8, gap=_gap(9.8, 10.0)), + _boundary(11.2, gap=gap), + ] + words = [_word(9.0, 9.5), _word(10.0, 10.5), _word(11.0, 11.2)] + gaps = [_gap(9.8, 10.0), gap] + + pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries) + + assert pause == pytest.approx(11.5, abs=0.01) # midpoint of 11.2–11.8 + assert warning is None + + def test_forward_boundary_within_window_is_preferred(self, svc): + """Even a slightly farther forward boundary beats a closer backward one.""" + gap_fwd = _gap(12.0, 12.6) + gap_bwd = _gap(10.1, 10.4) + boundaries = [ + _boundary(10.1, gap=gap_bwd), + _boundary(12.0, gap=gap_fwd), + ] + words = [_word(9.0, 10.1), _word(10.5, 12.0)] + gaps = [gap_bwd, gap_fwd] + + pause, _, _, _ = svc.snap_pause_point(10.5, words, gaps, boundaries) + + assert pause == pytest.approx(12.3, abs=0.01) # midpoint of 12.0–12.6 + + def test_falls_back_to_backward_when_no_forward_within_window(self, svc): + """No forward boundary within snap_forward_window → use backward (within 1.5s).""" + # Boundary at 9.2s: distance = 10.5 - 9.2 = 1.3s ≤ snap_backward_window (1.5s) ✓ + gap = _gap(9.0, 9.4) + boundaries = [_boundary(9.0, gap=gap)] + words = [_word(7.0, 9.0), _word(9.4, 10.5)] + gaps = [gap] + + pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries) + + assert pause == pytest.approx(9.2, abs=0.01) # midpoint of 9.0–9.4 + + def test_no_boundary_in_any_window_returns_gemini_with_warning(self, svc): + """Boundary exists but outside both windows → exact Gemini point + warning.""" + # Put boundaries 10s away in both directions (beyond any window) + boundaries = [ + _boundary(0.1, gap=_gap(0.0, 0.5)), + _boundary(50.0, gap=_gap(49.0, 50.0)), + ] + words = [_word(9.0, 12.0)] + gaps = [] + + pause, resume, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries) + + assert pause == pytest.approx(10.5) + assert warning is not None + assert "snap windows" in warning.lower() + + def test_not_during_speaking_uses_exact_point(self, svc): + """Pause point far from all words → no snap, exact point returned.""" + boundaries = [_boundary(5.0, gap=_gap(4.8, 5.3))] + words = [_word(0.0, 3.0)] # speech ends at 3s; pause at 7s + # Gap covers 3.0–10.0; pause at 7.0 is inside it + gaps = [_gap(3.0, 10.0)] + + pause, _, warning, natural_gap_ms = svc.snap_pause_point(7.0, words, gaps, boundaries) + + assert pause == pytest.approx(7.0) + assert warning is None + # natural_gap covers the pause (7.0 is inside gap 3.0–10.0) + assert natural_gap_ms > 0 + + +# ── A1: natural_gap_ms returned correctly ─────────────────────────────────── + +class TestNaturalGapMs: + def test_case_c_returns_gap_duration(self, svc): + """Case C (gap midpoint) must return gap.duration * 1000 as natural_gap_ms.""" + gap = _gap(10.0, 11.2) + boundaries = [_boundary(10.0, gap=gap)] + words = [_word(9.0, 10.0), _word(11.2, 12.0)] + gaps = [gap] + + _, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries) + + assert natural_gap_ms == pytest.approx(1200.0, abs=1.0) # 1.2s gap + + def test_no_gap_returns_zero(self, svc): + """Fallback case with no gap → natural_gap_ms == 0.""" + b = _boundary(10.0, gap=None) # no gap attached + words = [_word(9.0, 10.1), _word(10.1, 11.0)] + gaps = [] + + _, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b]) + + assert natural_gap_ms == 0.0 + + def test_not_during_speaking_reads_gap_from_gaps_list(self, svc): + """Not-during-speaking path should read natural gap from the gaps list.""" + gap = _gap(5.0, 6.0) # covers pause at 5.5s + words = [_word(0.0, 3.0)] # all speech before 3s + gaps = [gap] + + _, _, _, natural_gap_ms = svc.snap_pause_point(5.5, words, gaps, []) + + assert natural_gap_ms == pytest.approx(1000.0, abs=1.0) + + +# ── A3: minimum gap validation ─────────────────────────────────────────────── + +class TestMinGapValidation: + def test_short_gap_triggers_forward_search(self, svc): + """Case C gap < min_acceptable_gap → searches forward for a better gap.""" + short_gap = _gap(10.0, 10.1) # 0.1s < 0.2s threshold + good_gap = _gap(11.5, 12.0) # 0.5s — acceptable + boundaries = [_boundary(10.0, gap=short_gap)] + words = [_word(9.0, 10.0), _word(10.2, 11.5)] + gaps = [short_gap, good_gap] + + pause, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries) + + # Should snap forward to midpoint of good_gap (11.5+12.0)/2 = 11.75 + assert pause == pytest.approx(11.75, abs=0.01) + assert natural_gap_ms == pytest.approx(500.0, abs=1.0) + + def test_short_gap_no_forward_alternative_keeps_original(self, svc): + """Short gap, no acceptable gap ahead → stays at original point with warning.""" + short_gap = _gap(10.0, 10.1) + boundaries = [_boundary(10.0, gap=short_gap)] + words = [_word(9.0, 10.0), _word(10.2, 14.0)] + gaps = [short_gap] # no other gap + + pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries) + + # Falls back to midpoint of short_gap since no alternative + assert pause == pytest.approx(10.05, abs=0.01) + assert warning is None # no warning for "stayed at original" + + def test_fallback_no_gap_triggers_forward_search(self, svc): + """Fallback case (no gap on boundary) with no nearby gap → searches forward.""" + b = _boundary(10.0, gap=None) + good_gap = _gap(11.0, 11.8) + words = [_word(9.0, 10.0), _word(10.1, 11.0)] + gaps = [good_gap] + + pause, _, warning, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b]) + + assert pause == pytest.approx(11.4, abs=0.01) + assert natural_gap_ms == pytest.approx(800.0, abs=1.0) + assert warning is not None # warns that it snapped forward + + +# ── refine_all_pause_points integration ───────────────────────────────────── + +class TestRefineAllPausePointsIntegration: + def test_stores_natural_gap_ms_on_placement(self, svc): + """refine_all_pause_points must persist natural_gap_ms onto each placement.""" + gap = _gap(10.0, 11.0) + words = [_word(8.0, 10.0), _word(11.0, 12.0)] + gaps = [gap] + placements = [{"ad_cue_index": 0, "pause_point": 10.5, "ad_duration": 3.0}] + + refined, _ = svc.refine_all_pause_points(placements, words, gaps) + + assert "natural_gap_ms" in refined[0] + assert refined[0]["natural_gap_ms"] == pytest.approx(1000.0, abs=1.0) + + def test_no_whisper_data_returns_original_with_zero_gap(self, svc): + """No words → _is_during_speaking=False → exact point, no warning, natural_gap_ms=0.""" + placements = [{"ad_cue_index": 0, "pause_point": 5.0, "ad_duration": 2.0}] + + refined, warnings = svc.refine_all_pause_points(placements, [], []) + + assert refined[0]["pause_point"] == pytest.approx(5.0) + assert refined[0].get("natural_gap_ms", 0) == 0.0 + # No words → not-during-speaking path → no snap → no warning + assert len(warnings) == 0 diff --git a/docker-compose.yml b/docker-compose.yml index 3eec614..58ab188 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -94,7 +94,7 @@ services: JWT_ALG: ${JWT_ALG:-HS256} JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240} JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7} - COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions} COOKIE_SECURE: ${COOKIE_SECURE:-true} COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax} @@ -119,8 +119,8 @@ services: # Email SENDGRID_API_KEY: ${SENDGRID_API_KEY:-} - EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions} - CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility} + EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions} + CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility} # Microsoft Authentication AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} @@ -189,7 +189,7 @@ services: JWT_ALG: ${JWT_ALG:-HS256} JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240} JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7} - COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions} COOKIE_SECURE: ${COOKIE_SECURE:-true} COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax} @@ -215,8 +215,8 @@ services: # Email SENDGRID_API_KEY: ${SENDGRID_API_KEY:-} - EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions} - CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility} + EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions} + CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility} # Microsoft Authentication AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} @@ -276,7 +276,7 @@ services: JWT_ALG: ${JWT_ALG:-HS256} JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240} JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7} - COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions} COOKIE_SECURE: ${COOKIE_SECURE:-true} COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax} @@ -302,8 +302,8 @@ services: # Email SENDGRID_API_KEY: ${SENDGRID_API_KEY:-} - EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions} - CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility} + EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions} + CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility} # Microsoft Authentication AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} @@ -369,7 +369,7 @@ services: JWT_ALG: ${JWT_ALG:-HS256} JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240} JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7} - COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions} COOKIE_SECURE: ${COOKIE_SECURE:-true} COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax} @@ -395,8 +395,8 @@ services: # Email SENDGRID_API_KEY: ${SENDGRID_API_KEY:-} - EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions} - CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility} + EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions} + CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility} # Microsoft Authentication AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} @@ -466,7 +466,7 @@ services: JWT_ALG: ${JWT_ALG:-HS256} JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240} JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7} - COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions} + COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions} COOKIE_SECURE: ${COOKIE_SECURE:-true} COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax} @@ -492,8 +492,8 @@ services: # Email SENDGRID_API_KEY: ${SENDGRID_API_KEY:-} - EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions} - CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility} + EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions} + CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility} # Microsoft Authentication AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-} diff --git a/docs/project/api_spec.md b/docs/project/api_spec.md index 8903fd7..0692527 100644 --- a/docs/project/api_spec.md +++ b/docs/project/api_spec.md @@ -35,7 +35,7 @@ | Environment | URL | |-------------|-----| -| Production | `https://ai-sandbox.oliver.solutions/video-accessibility-back` | +| Production | `https://optical-dev.oliver.solutions/video-accessibility-back` | | Local (Docker) | `http://localhost:8012` | | OpenAPI (Swagger) | `{base_url}/docs` | diff --git a/docs/project/infrastructure.md b/docs/project/infrastructure.md index 2fa50ec..67d65c6 100644 --- a/docs/project/infrastructure.md +++ b/docs/project/infrastructure.md @@ -38,8 +38,8 @@ | `optical-web-1` | Production host — runs all Docker services | Production | | Local machine | Developer workstation — Docker Compose local stack | Development | -**Production URL:** `https://ai-sandbox.oliver.solutions/video-accessibility` -**Production API URL:** `https://ai-sandbox.oliver.solutions/video-accessibility-back` +**Production URL:** `https://optical-dev.oliver.solutions/video-accessibility` +**Production API URL:** `https://optical-dev.oliver.solutions/video-accessibility-back` --- @@ -68,7 +68,7 @@ All services are defined in `docker-compose.yml` and share the `accessible-video | `redis` | 6379 | — | Internal only | | Workers | — | — | No HTTP port | -Production: nginx reverse-proxies `optical-web-1:8012` → `https://ai-sandbox.oliver.solutions/video-accessibility-back`. +Production: nginx reverse-proxies `optical-web-1:8012` → `https://optical-dev.oliver.solutions/video-accessibility-back`. --- diff --git a/docs/project/runbook.md b/docs/project/runbook.md index 7ea4141..0ee3994 100644 --- a/docs/project/runbook.md +++ b/docs/project/runbook.md @@ -116,7 +116,7 @@ Production uses the `.env` file on optical-web-1. Key differences from `.env.exa |----------|-----------------| | `APP_ENV` | `production` | | `COOKIE_SECURE` | `true` | -| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` | +| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` | | All API keys | Real secret values | --- @@ -279,7 +279,7 @@ Copy from `.env.example`. All variables are required unless marked optional. | `JWT_ALG` | `HS256` | No | JWT signing algorithm | | `JWT_ACCESS_TTL_MIN` | `240` | No | Access token TTL (minutes) | | `JWT_REFRESH_TTL_DAYS` | `7` | No | Refresh token TTL (days) | -| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` | Yes | Refresh cookie domain | +| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` | Yes | Refresh cookie domain | | `COOKIE_SECURE` | `true` | No | Set `false` for local HTTP | | `COOKIE_SAMESITE` | `Lax` | No | | | `MONGODB_URI` | — | Yes | MongoDB connection string | @@ -295,7 +295,7 @@ Copy from `.env.example`. All variables are required unless marked optional. | `ELEVENLABS_API_KEY` | — | No | ElevenLabs API key | | `GOOGLE_TTS_CREDENTIALS` | `/secrets/gcp-credentials.json` | No | Separate TTS credentials if needed | | `SENDGRID_API_KEY` | — | No | SendGrid API key | -| `EMAIL_FROM` | `noreply@ai-sandbox.oliver.solutions` | No | Sender address | +| `EMAIL_FROM` | `noreply@optical-dev.oliver.solutions` | No | Sender address | | `CLIENT_BASE_URL` | — | No | Frontend URL for email links | | `AZURE_CLIENT_ID` | — | No | Microsoft SSO client ID | | `AZURE_AUTHORITY` | — | No | Microsoft tenant authority URL | diff --git a/frontend/src/components/TimelinePreview/TimelinePreview.tsx b/frontend/src/components/TimelinePreview/TimelinePreview.tsx index 1ca43f2..2bb36ee 100644 --- a/frontend/src/components/TimelinePreview/TimelinePreview.tsx +++ b/frontend/src/components/TimelinePreview/TimelinePreview.tsx @@ -41,13 +41,83 @@ export function TimelinePreview({ const [contextMenu, setContextMenu] = useState(null); const timelineRef = useRef(null); + // ── Drag state (B1 marker drag, B2 freeze-block drag) ───────────────────── + // State drives rendering (tooltip, cursor, position); refs allow event handlers + // to read current values synchronously without waiting for a React re-render + // (pointer events can fire faster than React batches state commits). + const [draggingCueIndex, setDraggingCueIndex] = useState(null); + const [dragMs, setDragMs] = useState(null); + const draggingCueIndexRef = useRef(null); + const dragMsRef = useRef(null); + const dragStartXRef = useRef(0); + const movedRef = useRef(false); + const getPositionPercent = useCallback( (ms: number) => (totalDurationMs > 0 ? (ms / totalDurationMs) * 100 : 0), [totalDurationMs] ); + // Converts a clientX coordinate to a clamped ms value for the given pause point. + const clientXToMs = useCallback( + (clientX: number, pp: PausePointData): number => { + if (!timelineRef.current || totalDurationMs <= 0) return pp.adjusted_ms ?? pp.original_ms; + const rect = timelineRef.current.getBoundingClientRect(); + const fraction = Math.max(0, Math.min(1, (clientX - rect.left) / rect.width)); + const raw = Math.round(fraction * totalDurationMs); + return Math.max(pp.min_bound_ms, Math.min(pp.max_bound_ms, raw)); + }, + [totalDurationMs] + ); + + // ── Pointer handlers wired to each pause-point marker / freeze block ─────── + + const handleDragPointerDown = (e: React.PointerEvent, pp: PausePointData) => { + if (e.button !== 0) return; // left-button only; let right-click fall through to onContextMenu + e.stopPropagation(); + (e.currentTarget as HTMLElement).setPointerCapture(e.pointerId); + dragStartXRef.current = e.clientX; + movedRef.current = false; + const initialMs = pp.adjusted_ms ?? pp.original_ms; + draggingCueIndexRef.current = pp.cue_index; + dragMsRef.current = initialMs; + setDraggingCueIndex(pp.cue_index); + setDragMs(initialMs); + }; + + const handleDragPointerMove = (e: React.PointerEvent, pp: PausePointData) => { + if (draggingCueIndexRef.current !== pp.cue_index) return; + if (Math.abs(e.clientX - dragStartXRef.current) > 3) movedRef.current = true; + if (!movedRef.current) return; + const ms = clientXToMs(e.clientX, pp); + dragMsRef.current = ms; + setDragMs(ms); + }; + + const handleDragPointerUp = ( + e: React.PointerEvent, + pp: PausePointData, + openEditorFn: () => void + ) => { + if (draggingCueIndexRef.current !== pp.cue_index) return; + (e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId); + const didMove = movedRef.current; + const finalMs = dragMsRef.current; + draggingCueIndexRef.current = null; + dragMsRef.current = null; + setDraggingCueIndex(null); + setDragMs(null); + + if (didMove && finalMs !== null && finalMs !== (pp.adjusted_ms ?? pp.original_ms)) { + onPausePointUpdate(pp.cue_index, finalMs); + } else if (!didMove) { + openEditorFn(); + } + }; + + // ── Existing click / editor handlers ────────────────────────────────────── + const handlePausePointMarkerClick = ( - e: React.MouseEvent, + e: React.MouseEvent | React.PointerEvent, pausePoint: PausePointData ) => { e.stopPropagation(); @@ -60,7 +130,6 @@ export function TimelinePreview({ const handleSegmentClick = (segment: VideoSegmentMetadata) => { onSegmentClick(segment); if (segment.is_freeze_frame && segment.cue_index !== null) { - // Highlight the AD cue const pausePoint = pausePoints.find(pp => pp.cue_index === segment.cue_index); if (pausePoint) { onPausePointClick(pausePoint); @@ -128,7 +197,19 @@ export function TimelinePreview({ > {/* Segments */} {segments.map((segment) => { - const leftPercent = getPositionPercent(segment.start_ms); + // During drag of this segment's cue, offset the block visually by the drag delta + const linkedPP = segment.is_freeze_frame && segment.cue_index !== null + ? pausePoints.find(pp => pp.cue_index === segment.cue_index) ?? null + : null; + const isDraggingBlock = linkedPP !== null && draggingCueIndex === linkedPP.cue_index; + + let leftMs = segment.start_ms; + if (isDraggingBlock && dragMs !== null && linkedPP) { + const origPpMs = linkedPP.adjusted_ms ?? linkedPP.original_ms; + leftMs = segment.start_ms + (dragMs - origPpMs); + } + + const leftPercent = getPositionPercent(leftMs); const widthPercent = getPositionPercent(segment.duration_ms); const isRegenerationQueued = segment.is_freeze_frame && @@ -138,21 +219,30 @@ export function TimelinePreview({ return (
handleSegmentClick(segment)} + onClick={() => { + if (!movedRef.current) handleSegmentClick(segment); + }} + onPointerDown={linkedPP ? (e) => handleDragPointerDown(e, linkedPP) : undefined} + onPointerMove={linkedPP ? (e) => handleDragPointerMove(e, linkedPP) : undefined} + onPointerUp={linkedPP + ? (e) => handleDragPointerUp(e, linkedPP, () => handleSegmentClick(segment)) + : undefined} title={ segment.is_freeze_frame - ? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''}` + ? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''} — drag to move` : `Video segment ${segment.segment_index}` } > @@ -170,21 +260,45 @@ export function TimelinePreview({ {/* Pause point markers */} {pausePoints.map((pausePoint) => { - const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms; - const leftPercent = getPositionPercent(effectiveMs); + const isDraggingThis = draggingCueIndex === pausePoint.cue_index; + const displayMs = isDraggingThis && dragMs !== null + ? dragMs + : (pausePoint.adjusted_ms ?? pausePoint.original_ms); + const leftPercent = getPositionPercent(displayMs); const isAdjusted = pausePoint.adjusted_ms !== null; return (
{ e.stopPropagation(); handlePausePointMarkerClick(e, pausePoint); }} - onContextMenu={(e) => { e.preventDefault(); e.stopPropagation(); handleContextMenuPauseOpen(pausePoint); }} - title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(effectiveMs)}${isAdjusted ? ' (adjusted)' : ''} — click to edit`} - /> + onPointerDown={(e) => handleDragPointerDown(e, pausePoint)} + onPointerMove={(e) => handleDragPointerMove(e, pausePoint)} + onPointerUp={(e) => + handleDragPointerUp(e, pausePoint, () => + handlePausePointMarkerClick(e, pausePoint) + ) + } + onContextMenu={(e) => { + e.preventDefault(); + e.stopPropagation(); + handleContextMenuPauseOpen(pausePoint); + }} + title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(pausePoint.adjusted_ms ?? pausePoint.original_ms)}${isAdjusted ? ' (adjusted)' : ''} — drag to move`} + > + {/* Drag time tooltip (B3) */} + {isDraggingThis && dragMs !== null && ( +
+ {formatTime(dragMs)} +
+ )} +
); })} @@ -209,7 +323,7 @@ export function TimelinePreview({
- AD Audio + AD Audio (drag to move)
@@ -217,7 +331,7 @@ export function TimelinePreview({
- Pause Point + Pause Point (drag to move)
diff --git a/frontend/src/components/TimelinePreview/__tests__/TimelinePreview.drag.test.tsx b/frontend/src/components/TimelinePreview/__tests__/TimelinePreview.drag.test.tsx new file mode 100644 index 0000000..db56a3a --- /dev/null +++ b/frontend/src/components/TimelinePreview/__tests__/TimelinePreview.drag.test.tsx @@ -0,0 +1,342 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest' +import { act } from 'react' +import { render, fireEvent } from '../../../test/utils' +import { TimelinePreview } from '../TimelinePreview' +import type { PausePointData, VideoSegmentMetadata } from '../../../types/api' + +// ── helpers ────────────────────────────────────────────────────────────────── + +function makePausePoint(overrides: Partial = {}): PausePointData { + return { + cue_index: 0, + original_ms: 5000, + source_ms: null, + adjusted_ms: null, + min_bound_ms: 1000, + max_bound_ms: 9000, + natural_gap_ms: 0, + ...overrides, + } +} + +function makeSegment(overrides: Partial = {}): VideoSegmentMetadata { + return { + segment_index: 0, + start_ms: 0, + end_ms: 5000, + gcs_uri: 'gs://test/segment.mp4', + duration_ms: 5000, + is_freeze_frame: false, + cue_index: null, + ...overrides, + } +} + +function makeFreezeSegment(cueIndex: number, startMs: number): VideoSegmentMetadata { + return makeSegment({ + segment_index: 1, + is_freeze_frame: true, + cue_index: cueIndex, + start_ms: startMs, + end_ms: startMs + 6000, + duration_ms: 6000, + }) +} + +// Default props used in most tests +function defaultProps(overrides: Record = {}) { + return { + segments: [], + pausePoints: [], + totalDurationMs: 10000, + currentTimeMs: 0, + onSegmentClick: vi.fn(), + onPausePointClick: vi.fn(), + onPausePointUpdate: vi.fn(), + onRegenerateTTS: vi.fn(), + regenerationQueue: [], + ...overrides, + } +} + +// Mock pointer capture (jsdom does not implement setPointerCapture) +beforeEach(() => { + Element.prototype.setPointerCapture = vi.fn() + Element.prototype.releasePointerCapture = vi.fn() +}) + +// jsdom's PointerEvent doesn't expose MouseEvent properties (button, clientX) from init — +// they come back as undefined, breaking `if (e.button !== 0)` and delta math. +// Work-around: dispatch MouseEvents typed as pointer events. MouseEvent correctly handles +// all MouseEventInit fields; React 19 routes by event.type, not constructor type. +function ptrDown(el: HTMLElement, init: { button?: number; clientX: number }) { + fireEvent(el, new MouseEvent('pointerdown', { + bubbles: true, cancelable: true, + button: init.button ?? 0, clientX: init.clientX, + })) +} + +function ptrMove(el: HTMLElement, init: { clientX: number }) { + fireEvent(el, new MouseEvent('pointermove', { + bubbles: true, cancelable: true, clientX: init.clientX, + })) +} + +function ptrUp(el: HTMLElement, init: { clientX: number }) { + fireEvent(el, new MouseEvent('pointerup', { + bubbles: true, cancelable: true, clientX: init.clientX, + })) +} + +// Helper to mock the timeline container rect so clientX→ms math works +function mockTimelineRect(container: HTMLElement) { + const timelineDiv = container.querySelector('.relative.h-16') as HTMLElement + if (timelineDiv) { + vi.spyOn(timelineDiv, 'getBoundingClientRect').mockReturnValue({ + left: 0, right: 1000, width: 1000, top: 0, bottom: 64, height: 64, x: 0, y: 0, + toJSON: () => {}, + } as DOMRect) + } + return timelineDiv +} + +// ── Marker drag tests (B1) ──────────────────────────────────────────────────── + +describe('TimelinePreview — marker drag', () => { + it('calls onPausePointUpdate when marker is dragged and released', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + expect(marker).toBeTruthy() + + // Drag: down at x=500 (5s), move to x=700 (7s), up at x=700 + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) // >3px threshold + ptrMove(marker, { clientX: 700 }) + ptrUp(marker, { clientX: 700 }) + }) + + expect(onPausePointUpdate).toHaveBeenCalledOnce() + const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0] + expect(cueIndex).toBe(0) + // 700/1000 * 10000 = 7000ms, within bounds [1000, 9000] + expect(adjustedMs).toBe(7000) + }) + + it('opens editor popover on click (no movement)', async () => { + const onPausePointUpdate = vi.fn() + const onPausePointClick = vi.fn() + const pp = makePausePoint({ cue_index: 0 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + // No move — stayed at same position + ptrUp(marker, { clientX: 500 }) + }) + + expect(onPausePointUpdate).not.toHaveBeenCalled() + expect(onPausePointClick).toHaveBeenCalledWith(pp) + }) + + it('does NOT call onPausePointUpdate if final position equals original', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + // Drag to 500 → 504 → 500 (same ms as start = 5000ms) + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 500 }) + ptrUp(marker, { clientX: 500 }) + }) + + expect(onPausePointUpdate).not.toHaveBeenCalled() + }) + + it('clamps drag to min_bound_ms', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000, min_bound_ms: 2000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + // Drag to x=50 → 500ms, below min_bound_ms=2000ms + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 50 }) + ptrUp(marker, { clientX: 50 }) + }) + + const [, adjustedMs] = onPausePointUpdate.mock.calls[0] + expect(adjustedMs).toBe(2000) // clamped to min_bound_ms + }) + + it('clamps drag to max_bound_ms', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000, max_bound_ms: 8000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + // Drag to x=950 → 9500ms, above max_bound_ms=8000ms + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 950 }) + ptrUp(marker, { clientX: 950 }) + }) + + const [, adjustedMs] = onPausePointUpdate.mock.calls[0] + expect(adjustedMs).toBe(8000) // clamped to max_bound_ms + }) + + it('right-click does NOT start drag (context menu allowed)', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + await act(async () => { + ptrDown(marker, { button: 2, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 700 }) + ptrUp(marker, { clientX: 700 }) + }) + + expect(onPausePointUpdate).not.toHaveBeenCalled() + }) +}) + +// ── Freeze-block drag tests (B2) ───────────────────────────────────────────── + +describe('TimelinePreview — freeze-block drag', () => { + it('calls onPausePointUpdate when freeze block is dragged', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000 }) + const freeze = makeFreezeSegment(0, 5000) + const { container } = render( + + ) + + mockTimelineRect(container) + const block = container.querySelector('[title*="AD Cue 1"]') as HTMLElement + expect(block).toBeTruthy() + + // Drag block from x=500 → x=600 (+1s = 1000ms delta) + await act(async () => { + ptrDown(block, { button: 0, clientX: 500 }) + ptrMove(block, { clientX: 504 }) + ptrMove(block, { clientX: 600 }) + ptrUp(block, { clientX: 600 }) + }) + + expect(onPausePointUpdate).toHaveBeenCalledOnce() + const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0] + expect(cueIndex).toBe(0) + expect(adjustedMs).toBe(6000) // 600/1000 * 10000 = 6000ms + }) + + it('non-freeze segments do not start drag', async () => { + const onPausePointUpdate = vi.fn() + const seg = makeSegment({ segment_index: 0, is_freeze_frame: false, start_ms: 0, duration_ms: 4000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const videoSeg = container.querySelector('[title*="Video segment"]') as HTMLElement + if (!videoSeg) return // no drag handlers on non-freeze segments → trivially passes + + await act(async () => { + ptrDown(videoSeg, { button: 0, clientX: 200 }) + ptrMove(videoSeg, { clientX: 204 }) + ptrMove(videoSeg, { clientX: 400 }) + ptrUp(videoSeg, { clientX: 400 }) + }) + + expect(onPausePointUpdate).not.toHaveBeenCalled() + }) +}) + +// ── Drag tooltip (B3) ───────────────────────────────────────────────────────── + +describe('TimelinePreview — drag tooltip', () => { + it('shows time tooltip during marker drag', async () => { + const pp = makePausePoint({ cue_index: 0, original_ms: 5000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + // Wrap in act so React flushes state updates before we query the DOM + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 700 }) + }) + + const tooltip = container.querySelector('.bg-gray-800') as HTMLElement + expect(tooltip).toBeTruthy() + expect(tooltip.textContent).toMatch(/\d:\d{2}/) + }) + + it('hides tooltip after drag ends', async () => { + const onPausePointUpdate = vi.fn() + const pp = makePausePoint({ cue_index: 0, original_ms: 5000 }) + const { container } = render( + + ) + + mockTimelineRect(container) + const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement + + await act(async () => { + ptrDown(marker, { button: 0, clientX: 500 }) + ptrMove(marker, { clientX: 504 }) + ptrMove(marker, { clientX: 700 }) + ptrUp(marker, { clientX: 700 }) + }) + + const tooltip = container.querySelector('.bg-gray-800') + expect(tooltip).toBeNull() + }) +}) diff --git a/frontend/src/test/setup.ts b/frontend/src/test/setup.ts index 5a24813..3c5b841 100644 --- a/frontend/src/test/setup.ts +++ b/frontend/src/test/setup.ts @@ -38,6 +38,7 @@ global.ResizeObserver = vi.fn(() => ({ global.URL.createObjectURL = vi.fn(() => 'mock-object-url') global.URL.revokeObjectURL = vi.fn() + // Mock HTMLMediaElement for video components Object.defineProperty(HTMLMediaElement.prototype, 'load', { writable: true, diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 174e3d5..d7b95b1 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -647,6 +647,7 @@ export interface PausePointData { adjusted_ms: number | null; min_bound_ms: number; max_bound_ms: number; + natural_gap_ms?: number; // Duration (ms) of natural silence at the pause point; 0 = none } export interface VideoSegmentMetadata { diff --git a/scripts/build-frontend.sh b/scripts/build-frontend.sh index f16d75b..d9f492c 100755 --- a/scripts/build-frontend.sh +++ b/scripts/build-frontend.sh @@ -203,7 +203,7 @@ display_summary() { echo -e "${GREEN}Frontend successfully deployed!${NC}" echo "" echo "Deployment location: $DEPLOY_DIR" - echo "Frontend URL: https://ai-sandbox.oliver.solutions/video-accessibility" + echo "Frontend URL: https://optical-dev.oliver.solutions/video-accessibility" echo "" echo "To verify the deployment, visit the URL above in your browser." echo ""