feat(pause-insert): adaptive buffer, forward-snap, timeline drag + share link fix
Backend (Phase A): - A1: Adaptive silence buffer — natural_gap_ms persisted per cue; renderer computes per-cue silence_before/silence_after instead of fixed 500ms; per-cue silence files - A2: Forward-preferred snap — snap_pause_point prefers boundaries up to 4s ahead over boundaries within 1.5s behind, reducing mid-scene cuts - A3: Min-gap validation — pause points with < 200ms gap trigger forward search to the next acceptable gap - natural_gap_ms added to PausePointData model and api.ts type - New config fields: whisper_snap_forward_window, whisper_snap_backward_window, ad_silence_buffer_default, ad_silence_buffer_min_after, ad_min_acceptable_gap - Tests: test_whisper_snap.py (13 tests), test_video_renderer_buffers.py Frontend (Phase B): - B1: Drag pause-point markers — pointer state machine with 3px move threshold, clamp to min/max bounds, click-without-move still opens PausePointEditor - B2: Drag freeze blocks — orange blocks translate with linked pause point - B3: Time tooltip visible during drag, hidden on release - Tests: TimelinePreview.drag.test.tsx (10 tests) Fixes: - Share link pointed to ai-sandbox.oliver.solutions — added app_url to Settings with correct optical-dev.oliver.solutions default; share_url now configurable via APP_URL env var - Removed all ai-sandbox.oliver.solutions references from docker-compose, apache config, docs, and scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
31d631f70d
commit
2f4925353a
18 changed files with 998 additions and 119 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# =============================================================================
|
||||
# Apache config fragment — Accessible Video Platform
|
||||
# Inject into: /etc/apache2/sites-available/ai-sandbox.oliver.solutions-ssl.conf
|
||||
# Inject into: /etc/apache2/sites-available/optical-dev.oliver.solutions-ssl.conf
|
||||
#
|
||||
# Required modules:
|
||||
# sudo a2enmod proxy proxy_http proxy_wstunnel rewrite headers
|
||||
|
|
@ -70,7 +70,7 @@ Alias /video-accessibility /var/www/html/video-accessibility
|
|||
# =============================================================================
|
||||
#
|
||||
# <VirtualHost *:443>
|
||||
# ServerName ai-sandbox.oliver.solutions
|
||||
# ServerName optical-dev.oliver.solutions
|
||||
# DocumentRoot /var/www/html
|
||||
#
|
||||
# SSLEngine on
|
||||
|
|
@ -82,8 +82,8 @@ Alias /video-accessibility /var/www/html/video-accessibility
|
|||
#
|
||||
# # — paste the block above here —
|
||||
#
|
||||
# ErrorLog ${APACHE_LOG_DIR}/ai-sandbox-error.log
|
||||
# CustomLog ${APACHE_LOG_DIR}/ai-sandbox-access.log combined
|
||||
# ErrorLog ${APACHE_LOG_DIR}/optical-dev-error.log
|
||||
# CustomLog ${APACHE_LOG_DIR}/optical-dev-access.log combined
|
||||
# </VirtualHost>
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -91,6 +91,6 @@ Alias /video-accessibility /var/www/html/video-accessibility
|
|||
# =============================================================================
|
||||
# sudo apache2ctl configtest
|
||||
# sudo systemctl reload apache2
|
||||
# curl -I https://ai-sandbox.oliver.solutions/video-accessibility/
|
||||
# curl https://ai-sandbox.oliver.solutions/video-accessibility/api/v1/health
|
||||
# wscat -c wss://ai-sandbox.oliver.solutions/video-accessibility/api/v1/ws/job-list
|
||||
# curl -I https://optical-dev.oliver.solutions/video-accessibility/
|
||||
# curl https://optical-dev.oliver.solutions/video-accessibility/api/v1/health
|
||||
# wscat -c wss://optical-dev.oliver.solutions/video-accessibility/api/v1/ws/job-list
|
||||
|
|
|
|||
|
|
@ -21,8 +21,7 @@ _JOBS = "jobs"
|
|||
|
||||
|
||||
def _share_url(token: str) -> str:
|
||||
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
|
||||
return f"{base}/share/{token}"
|
||||
return f"{settings.app_url}/share/{token}"
|
||||
|
||||
|
||||
# ── Request schemas ───────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ class Settings(BaseSettings):
|
|||
# App
|
||||
app_env: str = "dev"
|
||||
api_base_url: str = "http://localhost:8000"
|
||||
app_url: str = "https://optical-dev.oliver.solutions/video-accessibility"
|
||||
|
||||
# Auth
|
||||
jwt_secret: str
|
||||
|
|
@ -247,6 +248,14 @@ class Settings(BaseSettings):
|
|||
whisper_sentence_gap_threshold: float = 0.5 # Gap duration to classify as sentence boundary
|
||||
whisper_phrase_gap_threshold: float = 0.3 # Gap duration to classify as phrase boundary
|
||||
whisper_min_gap_threshold: float = 0.15 # Minimum gap duration to consider
|
||||
# Forward-preferred snap windows (A2)
|
||||
whisper_snap_forward_window: float = 4.0 # Prefer boundary up to N seconds ahead of Gemini point
|
||||
whisper_snap_backward_window: float = 1.5 # Fall back to boundary up to N seconds behind
|
||||
# Adaptive silence buffer (A1)
|
||||
ad_silence_buffer_default: float = 0.5 # Base silence duration (s) before/after AD audio
|
||||
ad_silence_buffer_min_after: float = 0.1 # Minimum silence after AD audio
|
||||
# Minimum gap required at the chosen pause point (A3)
|
||||
ad_min_acceptable_gap: float = 0.2 # Seconds; points with shorter gaps trigger forward search
|
||||
|
||||
# Cloud Run Service URLs (empty = use local processing)
|
||||
# When set, CPU-intensive work is offloaded to Cloud Run with autoscaling
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ class PausePointData(BaseModel):
|
|||
adjusted_ms: float | None = None # User-adjusted timestamp (ms), None = use original
|
||||
min_bound_ms: float # Minimum allowed value (end of previous AD segment)
|
||||
max_bound_ms: float # Maximum allowed value (start of next AD segment)
|
||||
natural_gap_ms: float = 0.0 # Natural silence already present at pause point (ms); used to size silence buffers
|
||||
|
||||
|
||||
class VideoSegmentMetadata(BaseModel):
|
||||
|
|
|
|||
|
|
@ -116,8 +116,7 @@ def _qc_recipients(
|
|||
|
||||
def _deep_link(job_id: str, lang: str) -> str:
|
||||
from ..core.config import settings
|
||||
base = getattr(settings, "app_url", "https://ai-sandbox.oliver.solutions/video-accessibility")
|
||||
return f"{base}/admin/qc/{job_id}#lang-{lang}"
|
||||
return f"{settings.app_url}/admin/qc/{job_id}#lang-{lang}"
|
||||
|
||||
|
||||
# ── Auto-assignment ───────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -54,6 +54,9 @@ class VideoRendererService:
|
|||
# Audio ducking settings
|
||||
self.duck_level = getattr(settings, 'accessible_video_duck_level', 0.3)
|
||||
self.duck_fade_ms = getattr(settings, 'accessible_video_duck_fade_ms', 200)
|
||||
# Adaptive silence buffer settings (A1)
|
||||
self._silence_buffer_default = getattr(settings, 'ad_silence_buffer_default', 0.5)
|
||||
self._silence_buffer_min_after = getattr(settings, 'ad_silence_buffer_min_after', 0.1)
|
||||
# Cloud Run support
|
||||
self._gcs_client: storage.Client | None = None
|
||||
# Source video caching for Cloud Run (uploaded once, reused across operations)
|
||||
|
|
@ -599,18 +602,38 @@ class VideoRendererService:
|
|||
final_segment_needed = final_segment_start < source_duration
|
||||
|
||||
# ============================================================
|
||||
# PARALLEL PHASE 1: Generate shared silence + extract all frames + all video segments
|
||||
# PARALLEL PHASE 1: Generate per-cue silence files + extract all frames + video segments
|
||||
# ============================================================
|
||||
logger.info(f"Phase 1: Parallel extraction of {len(valid_placements)} frames and video segments")
|
||||
|
||||
silence_duration = 0.5 # 500ms shared by all
|
||||
silence_path = temp_dir_path / "silence_shared.m4a"
|
||||
# Compute adaptive silence buffers per cue (A1):
|
||||
# natural_gap_ms already present at the pause point reduces how much silence we add.
|
||||
_buf_default = self._silence_buffer_default
|
||||
_buf_min_after = self._silence_buffer_min_after
|
||||
silence_pre_paths: dict[int, str] = {}
|
||||
silence_post_paths: dict[int, str] = {}
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
natural_gap = (p.get("natural_gap_ms") or 0.0) / 1000.0
|
||||
silence_before = max(0.05, _buf_default - natural_gap * 0.5)
|
||||
silence_after = max(_buf_min_after, _buf_default - natural_gap * 0.3)
|
||||
p["silence_before"] = silence_before
|
||||
p["silence_after"] = silence_after
|
||||
silence_pre_paths[i] = str(temp_dir_path / f"silence_pre_{i}.m4a")
|
||||
silence_post_paths[i] = str(temp_dir_path / f"silence_post_{i}.m4a")
|
||||
logger.debug(
|
||||
f"Cue {p['cue_index']}: natural_gap={natural_gap*1000:.0f}ms → "
|
||||
f"silence_before={silence_before*1000:.0f}ms silence_after={silence_after*1000:.0f}ms"
|
||||
)
|
||||
|
||||
# Build tasks for phase 1
|
||||
phase1_tasks = []
|
||||
|
||||
# Task: Generate silence (just once, shared by all)
|
||||
phase1_tasks.append(self._generate_silence(silence_duration, str(silence_path), video_props))
|
||||
# Tasks: Generate per-cue silence files
|
||||
for p in valid_placements:
|
||||
i = p["index"]
|
||||
phase1_tasks.append(self._generate_silence(p["silence_before"], silence_pre_paths[i], video_props))
|
||||
phase1_tasks.append(self._generate_silence(p["silence_after"], silence_post_paths[i], video_props))
|
||||
|
||||
# Tasks: Extract all video segments
|
||||
video_segment_paths = {}
|
||||
|
|
@ -667,7 +690,7 @@ class VideoRendererService:
|
|||
combined_audio_path = temp_dir_path / f"combined_audio_{i}.m4a"
|
||||
combined_audio_paths[i] = str(combined_audio_path)
|
||||
phase2_tasks.append(self._concatenate_audio(
|
||||
[str(silence_path), p["ad_mp3_path"], str(silence_path)],
|
||||
[silence_pre_paths[i], p["ad_mp3_path"], silence_post_paths[i]],
|
||||
str(combined_audio_path),
|
||||
video_props
|
||||
))
|
||||
|
|
@ -686,11 +709,14 @@ class VideoRendererService:
|
|||
i = p["index"]
|
||||
cue_index = p["cue_index"]
|
||||
ad_duration = p["ad_duration"]
|
||||
total_freeze_duration = ad_duration + (2 * silence_duration)
|
||||
silence_before = p["silence_before"]
|
||||
silence_after = p["silence_after"]
|
||||
total_freeze_duration = ad_duration + silence_before + silence_after
|
||||
|
||||
logger.info(
|
||||
f"Cue {cue_index}: Freeze segment with silence buffers - "
|
||||
f"500ms + AD={ad_duration:.2f}s + 500ms = {total_freeze_duration:.2f}s"
|
||||
f"Cue {cue_index}: Freeze segment — "
|
||||
f"pre={silence_before*1000:.0f}ms + AD={ad_duration:.2f}s + "
|
||||
f"post={silence_after*1000:.0f}ms = {total_freeze_duration:.2f}s"
|
||||
)
|
||||
|
||||
freeze_segment_path = temp_dir_path / f"freeze_segment_{i}.mp4"
|
||||
|
|
@ -720,7 +746,7 @@ class VideoRendererService:
|
|||
p["actual_freeze_duration"] = actual_duration
|
||||
|
||||
# Log any discrepancy between expected and actual duration
|
||||
expected = p["ad_duration"] + (2 * silence_duration)
|
||||
expected = p["ad_duration"] + p["silence_before"] + p["silence_after"]
|
||||
discrepancy = actual_duration - expected
|
||||
if abs(discrepancy) > 0.01: # 10ms threshold
|
||||
logger.warning(
|
||||
|
|
@ -1535,7 +1561,7 @@ class VideoRendererService:
|
|||
"""
|
||||
Generate a silent audio file of specified duration.
|
||||
|
||||
Used to create 500ms silence buffers before/after AD audio.
|
||||
Used to create adaptive silence buffers before/after AD audio.
|
||||
"""
|
||||
if self._use_cloud_run:
|
||||
await self._generate_silence_cloud_run(duration, output_path, props)
|
||||
|
|
|
|||
|
|
@ -97,8 +97,12 @@ class WhisperService:
|
|||
self.phrase_gap_threshold = _get_setting('whisper_phrase_gap_threshold', 0.3)
|
||||
self.min_gap_threshold = _get_setting('whisper_min_gap_threshold', 0.15)
|
||||
|
||||
# Snapping configuration
|
||||
# Snapping configuration (A2: forward-preferred snap)
|
||||
self.max_search_window = _get_setting('whisper_max_search_window', 30.0)
|
||||
self.snap_forward_window = _get_setting('whisper_snap_forward_window', 4.0)
|
||||
self.snap_backward_window = _get_setting('whisper_snap_backward_window', 1.5)
|
||||
# Minimum gap duration to accept as a pause insertion point (A3: gap validation)
|
||||
self.min_acceptable_gap = _get_setting('ad_min_acceptable_gap', 0.2)
|
||||
|
||||
@property
|
||||
def model(self) -> WhisperModel:
|
||||
|
|
@ -367,20 +371,18 @@ class WhisperService:
|
|||
gaps: list[SpeechGap],
|
||||
boundaries: list[SentenceBoundary],
|
||||
speaking_threshold: float = 2.0
|
||||
) -> tuple[float, float, str | None]:
|
||||
) -> tuple[float, float, str | None, float]:
|
||||
"""
|
||||
Snap a Gemini pause point to the nearest sentence boundary.
|
||||
Snap a Gemini pause point to an appropriate sentence boundary.
|
||||
|
||||
Simplified algorithm:
|
||||
1. Check if "during speaking" (words within ±threshold)
|
||||
- If NO → Use Gemini's exact pause point
|
||||
2. If during speaking, find nearest sentence gap and snap to MIDPOINT
|
||||
3. Edge cases:
|
||||
- Case A: First sentence in video → pause at video start (0.0)
|
||||
- Case B: Last sentence in video → pause at video end
|
||||
|
||||
The video renderer adds 500ms silence buffers before/after AD audio,
|
||||
so no overlap or catch-up logic is needed here.
|
||||
Algorithm:
|
||||
1. If NOT during speaking → use Gemini's exact point (already in a natural gap).
|
||||
2. Forward-preferred boundary selection: prefer boundaries ahead of gemini_pause
|
||||
within snap_forward_window, fall back to backward within snap_backward_window.
|
||||
3. Edge cases A/B/C handled identically to before; each now also returns
|
||||
natural_gap_ms so the renderer can shrink silence buffers accordingly.
|
||||
4. Gap validation (A3): if the chosen pause_point has no acceptable gap nearby,
|
||||
search forward for the next usable gap.
|
||||
|
||||
Args:
|
||||
gemini_pause: Original pause point from Gemini (seconds)
|
||||
|
|
@ -390,70 +392,133 @@ class WhisperService:
|
|||
speaking_threshold: Max distance to consider "during speaking" (default: 2.0s)
|
||||
|
||||
Returns:
|
||||
Tuple of (pause_point, resume_from, warning_message_or_none)
|
||||
Note: resume_from always equals pause_point with the simplified algorithm
|
||||
Tuple of (pause_point, resume_from, warning_or_none, natural_gap_ms)
|
||||
natural_gap_ms: ms of natural silence already present at the chosen point
|
||||
"""
|
||||
# Step 1: Check if "during speaking" (words within ±threshold)
|
||||
# Step 1: Not during speaking → use Gemini's exact point
|
||||
if not self._is_during_speaking(gemini_pause, words, speaking_threshold):
|
||||
# Not during speaking - use Gemini's exact pause point
|
||||
natural_gap_ms = self._gap_duration_at(gemini_pause, gaps) * 1000.0
|
||||
logger.info(
|
||||
f"Pause point {gemini_pause:.2f}s is NOT during speaking "
|
||||
f"(no words within ±{speaking_threshold}s), using Gemini's exact point"
|
||||
f"Pause {gemini_pause:.2f}s not during speaking "
|
||||
f"(no words within ±{speaking_threshold}s) → using exact point, "
|
||||
f"natural_gap={natural_gap_ms:.0f}ms"
|
||||
)
|
||||
return gemini_pause, gemini_pause, None
|
||||
return gemini_pause, gemini_pause, None, natural_gap_ms
|
||||
|
||||
# Step 2: During speaking - find nearest sentence boundary
|
||||
# Step 2: During speaking — forward-preferred boundary selection (A2)
|
||||
if not boundaries:
|
||||
# No boundaries found at all - use Gemini's point with warning
|
||||
logger.warning(f"No sentence boundaries found, using Gemini's exact point {gemini_pause:.2f}s")
|
||||
return gemini_pause, gemini_pause, "No sentence boundaries found in transcript"
|
||||
return gemini_pause, gemini_pause, "No sentence boundaries found in transcript", 0.0
|
||||
|
||||
# Find the boundary closest to the Gemini pause point
|
||||
closest_boundary = min(boundaries, key=lambda b: abs(b.time - gemini_pause))
|
||||
forward = [b for b in boundaries if 0 < b.time - gemini_pause <= self.snap_forward_window]
|
||||
backward = [b for b in boundaries if 0 < gemini_pause - b.time <= self.snap_backward_window]
|
||||
|
||||
logger.debug(
|
||||
f"Nearest boundary to {gemini_pause:.2f}s: {closest_boundary.boundary_type} "
|
||||
f"at {closest_boundary.time:.2f}s (distance: {abs(closest_boundary.time - gemini_pause):.2f}s)"
|
||||
)
|
||||
if forward:
|
||||
closest_boundary = min(forward, key=lambda b: b.time - gemini_pause)
|
||||
logger.debug(
|
||||
f"Forward snap: Gemini={gemini_pause:.2f}s → boundary "
|
||||
f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s "
|
||||
f"(+{closest_boundary.time - gemini_pause:.2f}s)"
|
||||
)
|
||||
elif backward:
|
||||
closest_boundary = min(backward, key=lambda b: gemini_pause - b.time)
|
||||
logger.debug(
|
||||
f"Backward snap (no forward boundary): Gemini={gemini_pause:.2f}s → "
|
||||
f"{closest_boundary.boundary_type}@{closest_boundary.time:.2f}s "
|
||||
f"(-{gemini_pause - closest_boundary.time:.2f}s)"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"No boundary within fwd={self.snap_forward_window}s / "
|
||||
f"bwd={self.snap_backward_window}s of {gemini_pause:.2f}s → using exact point"
|
||||
)
|
||||
return (
|
||||
gemini_pause, gemini_pause,
|
||||
f"No boundary within snap windows of {gemini_pause:.2f}s",
|
||||
0.0
|
||||
)
|
||||
|
||||
# Case A: First sentence in video (no previous sentence) → snap to video start
|
||||
# Case A: First sentence in video → snap to video start
|
||||
if closest_boundary.boundary_type == "sentence_start" and not closest_boundary.has_previous_sentence:
|
||||
pause_point = 0.0
|
||||
logger.info(
|
||||
f"Case A (first sentence): pause_point={pause_point:.2f}s "
|
||||
f"(snapped to video start)"
|
||||
)
|
||||
return pause_point, pause_point, None
|
||||
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
|
||||
logger.info("Case A (first sentence): pause_point=0.00s")
|
||||
return pause_point, pause_point, None, natural_gap_ms
|
||||
|
||||
# Case B: Last sentence in video (no next sentence) → snap to boundary time
|
||||
# Case B: Last sentence in video → snap to boundary time
|
||||
if closest_boundary.boundary_type == "sentence_end" and not closest_boundary.has_next_sentence:
|
||||
pause_point = closest_boundary.time
|
||||
logger.info(
|
||||
f"Case B (last sentence): pause_point={pause_point:.2f}s "
|
||||
f"(snapped to video end at sentence boundary)"
|
||||
)
|
||||
return pause_point, pause_point, None
|
||||
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
|
||||
logger.info(f"Case B (last sentence): pause_point={pause_point:.2f}s")
|
||||
return pause_point, pause_point, None, natural_gap_ms
|
||||
|
||||
# Case C: Gap between two sentences → snap to MIDPOINT of the gap
|
||||
# Case C: Gap between sentences → snap to midpoint
|
||||
if closest_boundary.gap:
|
||||
gap = closest_boundary.gap
|
||||
# Calculate midpoint between end of previous sentence and start of next
|
||||
midpoint = (gap.start + gap.end) / 2.0
|
||||
|
||||
pause_point = (gap.start + gap.end) / 2.0
|
||||
natural_gap_ms = gap.duration * 1000.0
|
||||
logger.info(
|
||||
f"Case C (between sentences): gap={gap.start:.2f}s-{gap.end:.2f}s, "
|
||||
f"midpoint={midpoint:.2f}s (resume from same point)"
|
||||
f"Case C: gap {gap.start:.2f}s–{gap.end:.2f}s → "
|
||||
f"midpoint={pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms"
|
||||
)
|
||||
return midpoint, midpoint, None
|
||||
# A3: gap too short → look for next acceptable gap
|
||||
if gap.duration < self.min_acceptable_gap:
|
||||
adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps)
|
||||
if warn is None:
|
||||
logger.info(
|
||||
f"A3: gap {gap.duration:.3f}s < threshold {self.min_acceptable_gap}s, "
|
||||
f"snapped forward to {adjusted:.2f}s (gap={adjusted_gap_ms:.0f}ms)"
|
||||
)
|
||||
return adjusted, adjusted, None, adjusted_gap_ms
|
||||
return pause_point, pause_point, None, natural_gap_ms
|
||||
|
||||
# Fallback: No gap associated with boundary - use the boundary time directly
|
||||
# This shouldn't normally happen but handles edge cases
|
||||
# Fallback: no gap → boundary time, then A3 validation
|
||||
pause_point = closest_boundary.time
|
||||
logger.info(
|
||||
f"Fallback: Using boundary at {closest_boundary.time:.2f}s, "
|
||||
f"pause_point={pause_point:.2f}s (no gap available)"
|
||||
)
|
||||
return pause_point, pause_point, None
|
||||
natural_gap_ms = self._gap_duration_at(pause_point, gaps) * 1000.0
|
||||
logger.info(f"Fallback: boundary at {pause_point:.2f}s, natural_gap={natural_gap_ms:.0f}ms")
|
||||
|
||||
if natural_gap_ms < self.min_acceptable_gap * 1000.0:
|
||||
adjusted, adjusted_gap_ms, warn = self._find_next_acceptable_gap(pause_point, gaps)
|
||||
if warn is None:
|
||||
logger.info(
|
||||
f"A3: no acceptable gap at {pause_point:.2f}s, "
|
||||
f"snapped forward to {adjusted:.2f}s"
|
||||
)
|
||||
return (
|
||||
adjusted, adjusted,
|
||||
f"Snapped forward to next acceptable gap (no gap at boundary {pause_point:.2f}s)",
|
||||
adjusted_gap_ms
|
||||
)
|
||||
logger.warning(f"A3: no acceptable gap found near {pause_point:.2f}s in forward window")
|
||||
return pause_point, pause_point, "No acceptable gap found near pause_point", 0.0
|
||||
|
||||
return pause_point, pause_point, None, natural_gap_ms
|
||||
|
||||
def _gap_duration_at(self, time: float, gaps: list[SpeechGap], epsilon: float = 0.1) -> float:
|
||||
"""Return duration in seconds of any gap whose window contains `time`, or 0.0."""
|
||||
for gap in gaps:
|
||||
if gap.start - epsilon <= time <= gap.end + epsilon:
|
||||
return gap.duration
|
||||
return 0.0
|
||||
|
||||
def _find_next_acceptable_gap(
|
||||
self, from_time: float, gaps: list[SpeechGap]
|
||||
) -> tuple[float, float, str | None]:
|
||||
"""Find the nearest forward gap with duration >= min_acceptable_gap.
|
||||
|
||||
Returns (midpoint_s, duration_ms, warning_or_none).
|
||||
"""
|
||||
candidates = [
|
||||
g for g in gaps
|
||||
if g.start > from_time
|
||||
and g.start - from_time <= self.snap_forward_window
|
||||
and g.duration >= self.min_acceptable_gap
|
||||
]
|
||||
if not candidates:
|
||||
return from_time, 0.0, "No acceptable gap in forward window"
|
||||
next_gap = min(candidates, key=lambda g: g.start)
|
||||
midpoint = (next_gap.start + next_gap.end) / 2.0
|
||||
return midpoint, next_gap.duration * 1000.0, None
|
||||
|
||||
def refine_all_pause_points(
|
||||
self,
|
||||
|
|
@ -496,13 +561,14 @@ class WhisperService:
|
|||
|
||||
if placement.get("pause_point") is not None:
|
||||
original = placement["pause_point"]
|
||||
pause_point, resume_from, warning = self.snap_pause_point(
|
||||
pause_point, resume_from, warning, natural_gap_ms = self.snap_pause_point(
|
||||
original, words, gaps, boundaries
|
||||
)
|
||||
|
||||
refined["pause_point"] = pause_point
|
||||
refined["resume_from"] = resume_from
|
||||
refined["original_pause_point"] = original # Preserve for debugging
|
||||
refined["natural_gap_ms"] = natural_gap_ms # For adaptive silence buffer
|
||||
|
||||
if warning:
|
||||
warnings.append(f"Cue {placement['ad_cue_index']}: {warning}")
|
||||
|
|
|
|||
90
backend/tests/unit/test_video_renderer_buffers.py
Normal file
90
backend/tests/unit/test_video_renderer_buffers.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
"""Tests for adaptive silence buffer formula in video_renderer.py (A1).
|
||||
|
||||
The renderer lives behind heavy GCP + FFmpeg deps only available in Docker.
|
||||
These tests cover the pure arithmetic used inside _render_pause_insert_method;
|
||||
they do not import VideoRendererService to stay runnable locally via pytest.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── Pure formula tests (no FFmpeg, no GCS) ───────────────────────────────────
|
||||
#
|
||||
# Mirrors the exact formula in _render_pause_insert_method:
|
||||
# natural_gap = natural_gap_ms / 1000.0
|
||||
# silence_before = max(0.05, default_buf - natural_gap * 0.5)
|
||||
# silence_after = max(min_after, default_buf - natural_gap * 0.3)
|
||||
|
||||
def _buffers(
|
||||
natural_gap_ms: float,
|
||||
default_buf: float = 0.5,
|
||||
min_after: float = 0.1,
|
||||
) -> tuple[float, float]:
|
||||
natural_gap = natural_gap_ms / 1000.0
|
||||
silence_before = max(0.05, default_buf - natural_gap * 0.5)
|
||||
silence_after = max(min_after, default_buf - natural_gap * 0.3)
|
||||
return silence_before, silence_after
|
||||
|
||||
|
||||
@pytest.mark.parametrize("natural_gap_ms,exp_before,exp_after", [
|
||||
# No natural gap → full default buffers
|
||||
(0, 0.50, 0.50),
|
||||
# 200 ms gap: before = 0.5 - 0.1 = 0.40; after = 0.5 - 0.06 = 0.44
|
||||
(200, 0.40, 0.44),
|
||||
# 500 ms gap: before = 0.5 - 0.25 = 0.25; after = 0.5 - 0.15 = 0.35
|
||||
(500, 0.25, 0.35),
|
||||
# 1000 ms gap: before = max(0.05, 0.5-0.5)=0.05; after = max(0.1, 0.5-0.3)=0.20
|
||||
(1000, 0.05, 0.20),
|
||||
# 1500 ms gap: before=0.05 (floor); after = max(0.1, 0.5-0.45)=0.10 (floor)
|
||||
(1500, 0.05, 0.10),
|
||||
# 2000 ms gap: both at their floors
|
||||
(2000, 0.05, 0.10),
|
||||
])
|
||||
def test_buffer_formula(natural_gap_ms, exp_before, exp_after):
|
||||
before, after = _buffers(natural_gap_ms)
|
||||
assert before == pytest.approx(exp_before, abs=0.001)
|
||||
assert after == pytest.approx(exp_after, abs=0.001)
|
||||
|
||||
|
||||
def test_total_freeze_duration_uses_adaptive_buffers():
|
||||
"""total_freeze_duration = ad_duration + silence_before + silence_after."""
|
||||
ad_duration = 5.0
|
||||
natural_gap_ms = 800.0 # 800ms natural gap
|
||||
|
||||
before, after = _buffers(natural_gap_ms)
|
||||
total = ad_duration + before + after
|
||||
|
||||
# before = max(0.05, 0.5 - 0.4) = 0.10; after = max(0.1, 0.5 - 0.24) = 0.26
|
||||
assert total == pytest.approx(ad_duration + before + after, abs=0.001)
|
||||
# Sanity: less than the old constant 1.0s overhead when there's a natural gap
|
||||
assert (before + after) < 1.0
|
||||
|
||||
|
||||
def test_buffers_never_below_floor():
|
||||
"""silence_before never < 0.05, silence_after never < 0.10, regardless of gap size."""
|
||||
for gap_ms in [0, 100, 500, 1000, 5000, 10000]:
|
||||
before, after = _buffers(gap_ms)
|
||||
assert before >= 0.05, f"silence_before={before} below floor for gap={gap_ms}ms"
|
||||
assert after >= 0.10, f"silence_after={after} below floor for gap={gap_ms}ms"
|
||||
|
||||
|
||||
def test_large_natural_gap_has_less_total_overhead_than_small_gap():
|
||||
"""Larger natural gap → smaller combined silence overhead."""
|
||||
before_small, after_small = _buffers(100)
|
||||
before_large, after_large = _buffers(900)
|
||||
|
||||
assert (before_small + after_small) > (before_large + after_large)
|
||||
|
||||
|
||||
def test_renderer_config_defaults_match_formula():
|
||||
"""The config defaults used in the formula match the expected values."""
|
||||
# These must stay in sync with config.py defaults:
|
||||
# ad_silence_buffer_default: float = 0.5
|
||||
# ad_silence_buffer_min_after: float = 0.1
|
||||
DEFAULT_BUF = 0.5
|
||||
MIN_AFTER = 0.1
|
||||
assert DEFAULT_BUF == pytest.approx(0.5)
|
||||
assert MIN_AFTER == pytest.approx(0.1)
|
||||
# Verify floors are derived from these values
|
||||
_, after = _buffers(10_000, DEFAULT_BUF, MIN_AFTER) # saturated gap
|
||||
assert after == pytest.approx(MIN_AFTER)
|
||||
231
backend/tests/unit/test_whisper_snap.py
Normal file
231
backend/tests/unit/test_whisper_snap.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
"""Tests for the improved snap_pause_point algorithm (A1/A2/A3)."""
|
||||
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
# faster_whisper ships only in the Docker image; stub it so pytest can run locally.
|
||||
if 'faster_whisper' not in sys.modules:
|
||||
sys.modules['faster_whisper'] = MagicMock()
|
||||
|
||||
import pytest
|
||||
from app.services.whisper_service import (
|
||||
WhisperService,
|
||||
WordTimestamp,
|
||||
SpeechGap,
|
||||
SentenceBoundary,
|
||||
)
|
||||
|
||||
|
||||
# ── fixtures ────────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.fixture
|
||||
def svc():
|
||||
"""WhisperService instance with default settings."""
|
||||
return WhisperService()
|
||||
|
||||
|
||||
def _word(start: float, end: float, text: str = "word") -> WordTimestamp:
|
||||
return WordTimestamp(word=text, start=start, end=end)
|
||||
|
||||
|
||||
def _gap(start: float, end: float, gap_type: str = "sentence") -> SpeechGap:
|
||||
return SpeechGap(start=start, end=end, duration=end - start, gap_type=gap_type)
|
||||
|
||||
|
||||
def _boundary(
|
||||
time: float,
|
||||
btype: str = "sentence_end",
|
||||
has_prev: bool = True,
|
||||
has_next: bool = True,
|
||||
gap: SpeechGap | None = None,
|
||||
) -> SentenceBoundary:
|
||||
return SentenceBoundary(
|
||||
time=time,
|
||||
boundary_type=btype,
|
||||
word_index=0,
|
||||
has_previous_sentence=has_prev,
|
||||
has_next_sentence=has_next,
|
||||
gap=gap,
|
||||
)
|
||||
|
||||
|
||||
# ── A2: forward-preferred snap ───────────────────────────────────────────────
|
||||
|
||||
class TestForwardPreferredSnap:
|
||||
def test_picks_forward_over_equidistant_backward(self, svc):
|
||||
"""Gemini=10.5s; forward boundary@11.2s and backward@9.8s — must pick forward."""
|
||||
gap = _gap(11.2, 11.8)
|
||||
boundaries = [
|
||||
_boundary(9.8, gap=_gap(9.8, 10.0)),
|
||||
_boundary(11.2, gap=gap),
|
||||
]
|
||||
words = [_word(9.0, 9.5), _word(10.0, 10.5), _word(11.0, 11.2)]
|
||||
gaps = [_gap(9.8, 10.0), gap]
|
||||
|
||||
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
assert pause == pytest.approx(11.5, abs=0.01) # midpoint of 11.2–11.8
|
||||
assert warning is None
|
||||
|
||||
def test_forward_boundary_within_window_is_preferred(self, svc):
|
||||
"""Even a slightly farther forward boundary beats a closer backward one."""
|
||||
gap_fwd = _gap(12.0, 12.6)
|
||||
gap_bwd = _gap(10.1, 10.4)
|
||||
boundaries = [
|
||||
_boundary(10.1, gap=gap_bwd),
|
||||
_boundary(12.0, gap=gap_fwd),
|
||||
]
|
||||
words = [_word(9.0, 10.1), _word(10.5, 12.0)]
|
||||
gaps = [gap_bwd, gap_fwd]
|
||||
|
||||
pause, _, _, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
assert pause == pytest.approx(12.3, abs=0.01) # midpoint of 12.0–12.6
|
||||
|
||||
def test_falls_back_to_backward_when_no_forward_within_window(self, svc):
|
||||
"""No forward boundary within snap_forward_window → use backward (within 1.5s)."""
|
||||
# Boundary at 9.2s: distance = 10.5 - 9.2 = 1.3s ≤ snap_backward_window (1.5s) ✓
|
||||
gap = _gap(9.0, 9.4)
|
||||
boundaries = [_boundary(9.0, gap=gap)]
|
||||
words = [_word(7.0, 9.0), _word(9.4, 10.5)]
|
||||
gaps = [gap]
|
||||
|
||||
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
assert pause == pytest.approx(9.2, abs=0.01) # midpoint of 9.0–9.4
|
||||
|
||||
def test_no_boundary_in_any_window_returns_gemini_with_warning(self, svc):
|
||||
"""Boundary exists but outside both windows → exact Gemini point + warning."""
|
||||
# Put boundaries 10s away in both directions (beyond any window)
|
||||
boundaries = [
|
||||
_boundary(0.1, gap=_gap(0.0, 0.5)),
|
||||
_boundary(50.0, gap=_gap(49.0, 50.0)),
|
||||
]
|
||||
words = [_word(9.0, 12.0)]
|
||||
gaps = []
|
||||
|
||||
pause, resume, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
assert pause == pytest.approx(10.5)
|
||||
assert warning is not None
|
||||
assert "snap windows" in warning.lower()
|
||||
|
||||
def test_not_during_speaking_uses_exact_point(self, svc):
|
||||
"""Pause point far from all words → no snap, exact point returned."""
|
||||
boundaries = [_boundary(5.0, gap=_gap(4.8, 5.3))]
|
||||
words = [_word(0.0, 3.0)] # speech ends at 3s; pause at 7s
|
||||
# Gap covers 3.0–10.0; pause at 7.0 is inside it
|
||||
gaps = [_gap(3.0, 10.0)]
|
||||
|
||||
pause, _, warning, natural_gap_ms = svc.snap_pause_point(7.0, words, gaps, boundaries)
|
||||
|
||||
assert pause == pytest.approx(7.0)
|
||||
assert warning is None
|
||||
# natural_gap covers the pause (7.0 is inside gap 3.0–10.0)
|
||||
assert natural_gap_ms > 0
|
||||
|
||||
|
||||
# ── A1: natural_gap_ms returned correctly ───────────────────────────────────
|
||||
|
||||
class TestNaturalGapMs:
|
||||
def test_case_c_returns_gap_duration(self, svc):
|
||||
"""Case C (gap midpoint) must return gap.duration * 1000 as natural_gap_ms."""
|
||||
gap = _gap(10.0, 11.2)
|
||||
boundaries = [_boundary(10.0, gap=gap)]
|
||||
words = [_word(9.0, 10.0), _word(11.2, 12.0)]
|
||||
gaps = [gap]
|
||||
|
||||
_, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
assert natural_gap_ms == pytest.approx(1200.0, abs=1.0) # 1.2s gap
|
||||
|
||||
def test_no_gap_returns_zero(self, svc):
|
||||
"""Fallback case with no gap → natural_gap_ms == 0."""
|
||||
b = _boundary(10.0, gap=None) # no gap attached
|
||||
words = [_word(9.0, 10.1), _word(10.1, 11.0)]
|
||||
gaps = []
|
||||
|
||||
_, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b])
|
||||
|
||||
assert natural_gap_ms == 0.0
|
||||
|
||||
def test_not_during_speaking_reads_gap_from_gaps_list(self, svc):
|
||||
"""Not-during-speaking path should read natural gap from the gaps list."""
|
||||
gap = _gap(5.0, 6.0) # covers pause at 5.5s
|
||||
words = [_word(0.0, 3.0)] # all speech before 3s
|
||||
gaps = [gap]
|
||||
|
||||
_, _, _, natural_gap_ms = svc.snap_pause_point(5.5, words, gaps, [])
|
||||
|
||||
assert natural_gap_ms == pytest.approx(1000.0, abs=1.0)
|
||||
|
||||
|
||||
# ── A3: minimum gap validation ───────────────────────────────────────────────
|
||||
|
||||
class TestMinGapValidation:
|
||||
def test_short_gap_triggers_forward_search(self, svc):
|
||||
"""Case C gap < min_acceptable_gap → searches forward for a better gap."""
|
||||
short_gap = _gap(10.0, 10.1) # 0.1s < 0.2s threshold
|
||||
good_gap = _gap(11.5, 12.0) # 0.5s — acceptable
|
||||
boundaries = [_boundary(10.0, gap=short_gap)]
|
||||
words = [_word(9.0, 10.0), _word(10.2, 11.5)]
|
||||
gaps = [short_gap, good_gap]
|
||||
|
||||
pause, _, _, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
# Should snap forward to midpoint of good_gap (11.5+12.0)/2 = 11.75
|
||||
assert pause == pytest.approx(11.75, abs=0.01)
|
||||
assert natural_gap_ms == pytest.approx(500.0, abs=1.0)
|
||||
|
||||
def test_short_gap_no_forward_alternative_keeps_original(self, svc):
|
||||
"""Short gap, no acceptable gap ahead → stays at original point with warning."""
|
||||
short_gap = _gap(10.0, 10.1)
|
||||
boundaries = [_boundary(10.0, gap=short_gap)]
|
||||
words = [_word(9.0, 10.0), _word(10.2, 14.0)]
|
||||
gaps = [short_gap] # no other gap
|
||||
|
||||
pause, _, warning, _ = svc.snap_pause_point(10.5, words, gaps, boundaries)
|
||||
|
||||
# Falls back to midpoint of short_gap since no alternative
|
||||
assert pause == pytest.approx(10.05, abs=0.01)
|
||||
assert warning is None # no warning for "stayed at original"
|
||||
|
||||
def test_fallback_no_gap_triggers_forward_search(self, svc):
|
||||
"""Fallback case (no gap on boundary) with no nearby gap → searches forward."""
|
||||
b = _boundary(10.0, gap=None)
|
||||
good_gap = _gap(11.0, 11.8)
|
||||
words = [_word(9.0, 10.0), _word(10.1, 11.0)]
|
||||
gaps = [good_gap]
|
||||
|
||||
pause, _, warning, natural_gap_ms = svc.snap_pause_point(10.5, words, gaps, [b])
|
||||
|
||||
assert pause == pytest.approx(11.4, abs=0.01)
|
||||
assert natural_gap_ms == pytest.approx(800.0, abs=1.0)
|
||||
assert warning is not None # warns that it snapped forward
|
||||
|
||||
|
||||
# ── refine_all_pause_points integration ─────────────────────────────────────
|
||||
|
||||
class TestRefineAllPausePointsIntegration:
|
||||
def test_stores_natural_gap_ms_on_placement(self, svc):
|
||||
"""refine_all_pause_points must persist natural_gap_ms onto each placement."""
|
||||
gap = _gap(10.0, 11.0)
|
||||
words = [_word(8.0, 10.0), _word(11.0, 12.0)]
|
||||
gaps = [gap]
|
||||
placements = [{"ad_cue_index": 0, "pause_point": 10.5, "ad_duration": 3.0}]
|
||||
|
||||
refined, _ = svc.refine_all_pause_points(placements, words, gaps)
|
||||
|
||||
assert "natural_gap_ms" in refined[0]
|
||||
assert refined[0]["natural_gap_ms"] == pytest.approx(1000.0, abs=1.0)
|
||||
|
||||
def test_no_whisper_data_returns_original_with_zero_gap(self, svc):
|
||||
"""No words → _is_during_speaking=False → exact point, no warning, natural_gap_ms=0."""
|
||||
placements = [{"ad_cue_index": 0, "pause_point": 5.0, "ad_duration": 2.0}]
|
||||
|
||||
refined, warnings = svc.refine_all_pause_points(placements, [], [])
|
||||
|
||||
assert refined[0]["pause_point"] == pytest.approx(5.0)
|
||||
assert refined[0].get("natural_gap_ms", 0) == 0.0
|
||||
# No words → not-during-speaking path → no snap → no warning
|
||||
assert len(warnings) == 0
|
||||
|
|
@ -94,7 +94,7 @@ services:
|
|||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
|
|
@ -119,8 +119,8 @@ services:
|
|||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
|
||||
|
||||
# Microsoft Authentication
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
|
|
@ -189,7 +189,7 @@ services:
|
|||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
|
|
@ -215,8 +215,8 @@ services:
|
|||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
|
||||
|
||||
# Microsoft Authentication
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
|
|
@ -276,7 +276,7 @@ services:
|
|||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
|
|
@ -302,8 +302,8 @@ services:
|
|||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
|
||||
|
||||
# Microsoft Authentication
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
|
|
@ -369,7 +369,7 @@ services:
|
|||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
|
|
@ -395,8 +395,8 @@ services:
|
|||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
|
||||
|
||||
# Microsoft Authentication
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
|
|
@ -466,7 +466,7 @@ services:
|
|||
JWT_ALG: ${JWT_ALG:-HS256}
|
||||
JWT_ACCESS_TTL_MIN: ${JWT_ACCESS_TTL_MIN:-240}
|
||||
JWT_REFRESH_TTL_DAYS: ${JWT_REFRESH_TTL_DAYS:-7}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-ai-sandbox.oliver.solutions}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-optical-dev.oliver.solutions}
|
||||
COOKIE_SECURE: ${COOKIE_SECURE:-true}
|
||||
COOKIE_SAMESITE: ${COOKIE_SAMESITE:-Lax}
|
||||
|
||||
|
|
@ -492,8 +492,8 @@ services:
|
|||
|
||||
# Email
|
||||
SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@ai-sandbox.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://ai-sandbox.oliver.solutions/video-accessibility}
|
||||
EMAIL_FROM: ${EMAIL_FROM:-noreply@optical-dev.oliver.solutions}
|
||||
CLIENT_BASE_URL: ${CLIENT_BASE_URL:-https://optical-dev.oliver.solutions/video-accessibility}
|
||||
|
||||
# Microsoft Authentication
|
||||
AZURE_CLIENT_ID: ${AZURE_CLIENT_ID:-}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
| Environment | URL |
|
||||
|-------------|-----|
|
||||
| Production | `https://ai-sandbox.oliver.solutions/video-accessibility-back` |
|
||||
| Production | `https://optical-dev.oliver.solutions/video-accessibility-back` |
|
||||
| Local (Docker) | `http://localhost:8012` |
|
||||
| OpenAPI (Swagger) | `{base_url}/docs` |
|
||||
|
||||
|
|
|
|||
|
|
@ -38,8 +38,8 @@
|
|||
| `optical-web-1` | Production host — runs all Docker services | Production |
|
||||
| Local machine | Developer workstation — Docker Compose local stack | Development |
|
||||
|
||||
**Production URL:** `https://ai-sandbox.oliver.solutions/video-accessibility`
|
||||
**Production API URL:** `https://ai-sandbox.oliver.solutions/video-accessibility-back`
|
||||
**Production URL:** `https://optical-dev.oliver.solutions/video-accessibility`
|
||||
**Production API URL:** `https://optical-dev.oliver.solutions/video-accessibility-back`
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ All services are defined in `docker-compose.yml` and share the `accessible-video
|
|||
| `redis` | 6379 | — | Internal only |
|
||||
| Workers | — | — | No HTTP port |
|
||||
|
||||
Production: nginx reverse-proxies `optical-web-1:8012` → `https://ai-sandbox.oliver.solutions/video-accessibility-back`.
|
||||
Production: nginx reverse-proxies `optical-web-1:8012` → `https://optical-dev.oliver.solutions/video-accessibility-back`.
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ Production uses the `.env` file on optical-web-1. Key differences from `.env.exa
|
|||
|----------|-----------------|
|
||||
| `APP_ENV` | `production` |
|
||||
| `COOKIE_SECURE` | `true` |
|
||||
| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` |
|
||||
| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` |
|
||||
| All API keys | Real secret values |
|
||||
|
||||
---
|
||||
|
|
@ -279,7 +279,7 @@ Copy from `.env.example`. All variables are required unless marked optional.
|
|||
| `JWT_ALG` | `HS256` | No | JWT signing algorithm |
|
||||
| `JWT_ACCESS_TTL_MIN` | `240` | No | Access token TTL (minutes) |
|
||||
| `JWT_REFRESH_TTL_DAYS` | `7` | No | Refresh token TTL (days) |
|
||||
| `COOKIE_DOMAIN` | `ai-sandbox.oliver.solutions` | Yes | Refresh cookie domain |
|
||||
| `COOKIE_DOMAIN` | `optical-dev.oliver.solutions` | Yes | Refresh cookie domain |
|
||||
| `COOKIE_SECURE` | `true` | No | Set `false` for local HTTP |
|
||||
| `COOKIE_SAMESITE` | `Lax` | No | |
|
||||
| `MONGODB_URI` | — | Yes | MongoDB connection string |
|
||||
|
|
@ -295,7 +295,7 @@ Copy from `.env.example`. All variables are required unless marked optional.
|
|||
| `ELEVENLABS_API_KEY` | — | No | ElevenLabs API key |
|
||||
| `GOOGLE_TTS_CREDENTIALS` | `/secrets/gcp-credentials.json` | No | Separate TTS credentials if needed |
|
||||
| `SENDGRID_API_KEY` | — | No | SendGrid API key |
|
||||
| `EMAIL_FROM` | `noreply@ai-sandbox.oliver.solutions` | No | Sender address |
|
||||
| `EMAIL_FROM` | `noreply@optical-dev.oliver.solutions` | No | Sender address |
|
||||
| `CLIENT_BASE_URL` | — | No | Frontend URL for email links |
|
||||
| `AZURE_CLIENT_ID` | — | No | Microsoft SSO client ID |
|
||||
| `AZURE_AUTHORITY` | — | No | Microsoft tenant authority URL |
|
||||
|
|
|
|||
|
|
@ -41,13 +41,83 @@ export function TimelinePreview({
|
|||
const [contextMenu, setContextMenu] = useState<ContextMenuState | null>(null);
|
||||
const timelineRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
// ── Drag state (B1 marker drag, B2 freeze-block drag) ─────────────────────
|
||||
// State drives rendering (tooltip, cursor, position); refs allow event handlers
|
||||
// to read current values synchronously without waiting for a React re-render
|
||||
// (pointer events can fire faster than React batches state commits).
|
||||
const [draggingCueIndex, setDraggingCueIndex] = useState<number | null>(null);
|
||||
const [dragMs, setDragMs] = useState<number | null>(null);
|
||||
const draggingCueIndexRef = useRef<number | null>(null);
|
||||
const dragMsRef = useRef<number | null>(null);
|
||||
const dragStartXRef = useRef<number>(0);
|
||||
const movedRef = useRef<boolean>(false);
|
||||
|
||||
const getPositionPercent = useCallback(
|
||||
(ms: number) => (totalDurationMs > 0 ? (ms / totalDurationMs) * 100 : 0),
|
||||
[totalDurationMs]
|
||||
);
|
||||
|
||||
// Converts a clientX coordinate to a clamped ms value for the given pause point.
|
||||
const clientXToMs = useCallback(
|
||||
(clientX: number, pp: PausePointData): number => {
|
||||
if (!timelineRef.current || totalDurationMs <= 0) return pp.adjusted_ms ?? pp.original_ms;
|
||||
const rect = timelineRef.current.getBoundingClientRect();
|
||||
const fraction = Math.max(0, Math.min(1, (clientX - rect.left) / rect.width));
|
||||
const raw = Math.round(fraction * totalDurationMs);
|
||||
return Math.max(pp.min_bound_ms, Math.min(pp.max_bound_ms, raw));
|
||||
},
|
||||
[totalDurationMs]
|
||||
);
|
||||
|
||||
// ── Pointer handlers wired to each pause-point marker / freeze block ───────
|
||||
|
||||
const handleDragPointerDown = (e: React.PointerEvent, pp: PausePointData) => {
|
||||
if (e.button !== 0) return; // left-button only; let right-click fall through to onContextMenu
|
||||
e.stopPropagation();
|
||||
(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId);
|
||||
dragStartXRef.current = e.clientX;
|
||||
movedRef.current = false;
|
||||
const initialMs = pp.adjusted_ms ?? pp.original_ms;
|
||||
draggingCueIndexRef.current = pp.cue_index;
|
||||
dragMsRef.current = initialMs;
|
||||
setDraggingCueIndex(pp.cue_index);
|
||||
setDragMs(initialMs);
|
||||
};
|
||||
|
||||
const handleDragPointerMove = (e: React.PointerEvent, pp: PausePointData) => {
|
||||
if (draggingCueIndexRef.current !== pp.cue_index) return;
|
||||
if (Math.abs(e.clientX - dragStartXRef.current) > 3) movedRef.current = true;
|
||||
if (!movedRef.current) return;
|
||||
const ms = clientXToMs(e.clientX, pp);
|
||||
dragMsRef.current = ms;
|
||||
setDragMs(ms);
|
||||
};
|
||||
|
||||
const handleDragPointerUp = (
|
||||
e: React.PointerEvent,
|
||||
pp: PausePointData,
|
||||
openEditorFn: () => void
|
||||
) => {
|
||||
if (draggingCueIndexRef.current !== pp.cue_index) return;
|
||||
(e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId);
|
||||
const didMove = movedRef.current;
|
||||
const finalMs = dragMsRef.current;
|
||||
draggingCueIndexRef.current = null;
|
||||
dragMsRef.current = null;
|
||||
setDraggingCueIndex(null);
|
||||
setDragMs(null);
|
||||
|
||||
if (didMove && finalMs !== null && finalMs !== (pp.adjusted_ms ?? pp.original_ms)) {
|
||||
onPausePointUpdate(pp.cue_index, finalMs);
|
||||
} else if (!didMove) {
|
||||
openEditorFn();
|
||||
}
|
||||
};
|
||||
|
||||
// ── Existing click / editor handlers ──────────────────────────────────────
|
||||
|
||||
const handlePausePointMarkerClick = (
|
||||
e: React.MouseEvent,
|
||||
e: React.MouseEvent | React.PointerEvent,
|
||||
pausePoint: PausePointData
|
||||
) => {
|
||||
e.stopPropagation();
|
||||
|
|
@ -60,7 +130,6 @@ export function TimelinePreview({
|
|||
const handleSegmentClick = (segment: VideoSegmentMetadata) => {
|
||||
onSegmentClick(segment);
|
||||
if (segment.is_freeze_frame && segment.cue_index !== null) {
|
||||
// Highlight the AD cue
|
||||
const pausePoint = pausePoints.find(pp => pp.cue_index === segment.cue_index);
|
||||
if (pausePoint) {
|
||||
onPausePointClick(pausePoint);
|
||||
|
|
@ -128,7 +197,19 @@ export function TimelinePreview({
|
|||
>
|
||||
{/* Segments */}
|
||||
{segments.map((segment) => {
|
||||
const leftPercent = getPositionPercent(segment.start_ms);
|
||||
// During drag of this segment's cue, offset the block visually by the drag delta
|
||||
const linkedPP = segment.is_freeze_frame && segment.cue_index !== null
|
||||
? pausePoints.find(pp => pp.cue_index === segment.cue_index) ?? null
|
||||
: null;
|
||||
const isDraggingBlock = linkedPP !== null && draggingCueIndex === linkedPP.cue_index;
|
||||
|
||||
let leftMs = segment.start_ms;
|
||||
if (isDraggingBlock && dragMs !== null && linkedPP) {
|
||||
const origPpMs = linkedPP.adjusted_ms ?? linkedPP.original_ms;
|
||||
leftMs = segment.start_ms + (dragMs - origPpMs);
|
||||
}
|
||||
|
||||
const leftPercent = getPositionPercent(leftMs);
|
||||
const widthPercent = getPositionPercent(segment.duration_ms);
|
||||
const isRegenerationQueued =
|
||||
segment.is_freeze_frame &&
|
||||
|
|
@ -138,21 +219,30 @@ export function TimelinePreview({
|
|||
return (
|
||||
<div
|
||||
key={segment.segment_index}
|
||||
className={`absolute top-0 h-full cursor-pointer transition-all hover:opacity-90 ${
|
||||
className={`absolute top-0 h-full transition-all hover:opacity-90 ${
|
||||
segment.is_freeze_frame
|
||||
? isRegenerationQueued
|
||||
? 'bg-amber-400'
|
||||
: 'bg-orange-400'
|
||||
: 'bg-blue-400'
|
||||
: isDraggingBlock
|
||||
? 'bg-orange-500 cursor-grabbing'
|
||||
: 'bg-orange-400 cursor-grab'
|
||||
: 'bg-blue-400 cursor-pointer'
|
||||
}`}
|
||||
style={{
|
||||
left: `${leftPercent}%`,
|
||||
width: `${Math.max(widthPercent, 0.5)}%`,
|
||||
}}
|
||||
onClick={() => handleSegmentClick(segment)}
|
||||
onClick={() => {
|
||||
if (!movedRef.current) handleSegmentClick(segment);
|
||||
}}
|
||||
onPointerDown={linkedPP ? (e) => handleDragPointerDown(e, linkedPP) : undefined}
|
||||
onPointerMove={linkedPP ? (e) => handleDragPointerMove(e, linkedPP) : undefined}
|
||||
onPointerUp={linkedPP
|
||||
? (e) => handleDragPointerUp(e, linkedPP, () => handleSegmentClick(segment))
|
||||
: undefined}
|
||||
title={
|
||||
segment.is_freeze_frame
|
||||
? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''}`
|
||||
? `AD Cue ${segment.cue_index !== null ? segment.cue_index + 1 : ''}${isRegenerationQueued ? ' (Regenerate queued)' : ''} — drag to move`
|
||||
: `Video segment ${segment.segment_index}`
|
||||
}
|
||||
>
|
||||
|
|
@ -170,21 +260,45 @@ export function TimelinePreview({
|
|||
|
||||
{/* Pause point markers */}
|
||||
{pausePoints.map((pausePoint) => {
|
||||
const effectiveMs = pausePoint.adjusted_ms ?? pausePoint.original_ms;
|
||||
const leftPercent = getPositionPercent(effectiveMs);
|
||||
const isDraggingThis = draggingCueIndex === pausePoint.cue_index;
|
||||
const displayMs = isDraggingThis && dragMs !== null
|
||||
? dragMs
|
||||
: (pausePoint.adjusted_ms ?? pausePoint.original_ms);
|
||||
const leftPercent = getPositionPercent(displayMs);
|
||||
const isAdjusted = pausePoint.adjusted_ms !== null;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={`pause-${pausePoint.cue_index}`}
|
||||
className={`absolute top-0 w-2 h-full cursor-pointer z-10 hover:w-3 transition-all ${
|
||||
isAdjusted ? 'bg-purple-600' : 'bg-red-600'
|
||||
className={`absolute top-0 w-2 h-full z-10 transition-[width] select-none ${
|
||||
isDraggingThis
|
||||
? 'cursor-grabbing w-3 ' + (isAdjusted ? 'bg-purple-700' : 'bg-red-700')
|
||||
: 'hover:w-3 ' + (isAdjusted ? 'bg-purple-600 cursor-grab' : 'bg-red-600 cursor-grab')
|
||||
}`}
|
||||
style={{ left: `${leftPercent}%` }}
|
||||
onClick={(e) => { e.stopPropagation(); handlePausePointMarkerClick(e, pausePoint); }}
|
||||
onContextMenu={(e) => { e.preventDefault(); e.stopPropagation(); handleContextMenuPauseOpen(pausePoint); }}
|
||||
title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(effectiveMs)}${isAdjusted ? ' (adjusted)' : ''} — click to edit`}
|
||||
/>
|
||||
onPointerDown={(e) => handleDragPointerDown(e, pausePoint)}
|
||||
onPointerMove={(e) => handleDragPointerMove(e, pausePoint)}
|
||||
onPointerUp={(e) =>
|
||||
handleDragPointerUp(e, pausePoint, () =>
|
||||
handlePausePointMarkerClick(e, pausePoint)
|
||||
)
|
||||
}
|
||||
onContextMenu={(e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
handleContextMenuPauseOpen(pausePoint);
|
||||
}}
|
||||
title={`Pause point ${pausePoint.cue_index + 1}: ${formatTime(pausePoint.adjusted_ms ?? pausePoint.original_ms)}${isAdjusted ? ' (adjusted)' : ''} — drag to move`}
|
||||
>
|
||||
{/* Drag time tooltip (B3) */}
|
||||
{isDraggingThis && dragMs !== null && (
|
||||
<div
|
||||
className="absolute -top-7 left-1/2 -translate-x-1/2 bg-gray-800 text-white text-xs rounded px-1.5 py-0.5 whitespace-nowrap pointer-events-none shadow"
|
||||
>
|
||||
{formatTime(dragMs)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
|
||||
|
|
@ -209,7 +323,7 @@ export function TimelinePreview({
|
|||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-orange-400 rounded" />
|
||||
<span>AD Audio</span>
|
||||
<span>AD Audio (drag to move)</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-3 h-3 bg-amber-400 rounded" />
|
||||
|
|
@ -217,7 +331,7 @@ export function TimelinePreview({
|
|||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-3 bg-red-600" />
|
||||
<span>Pause Point</span>
|
||||
<span>Pause Point (drag to move)</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-3 bg-purple-600" />
|
||||
|
|
|
|||
|
|
@ -0,0 +1,342 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { act } from 'react'
|
||||
import { render, fireEvent } from '../../../test/utils'
|
||||
import { TimelinePreview } from '../TimelinePreview'
|
||||
import type { PausePointData, VideoSegmentMetadata } from '../../../types/api'
|
||||
|
||||
// ── helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
function makePausePoint(overrides: Partial<PausePointData> = {}): PausePointData {
|
||||
return {
|
||||
cue_index: 0,
|
||||
original_ms: 5000,
|
||||
source_ms: null,
|
||||
adjusted_ms: null,
|
||||
min_bound_ms: 1000,
|
||||
max_bound_ms: 9000,
|
||||
natural_gap_ms: 0,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
function makeSegment(overrides: Partial<VideoSegmentMetadata> = {}): VideoSegmentMetadata {
|
||||
return {
|
||||
segment_index: 0,
|
||||
start_ms: 0,
|
||||
end_ms: 5000,
|
||||
gcs_uri: 'gs://test/segment.mp4',
|
||||
duration_ms: 5000,
|
||||
is_freeze_frame: false,
|
||||
cue_index: null,
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
function makeFreezeSegment(cueIndex: number, startMs: number): VideoSegmentMetadata {
|
||||
return makeSegment({
|
||||
segment_index: 1,
|
||||
is_freeze_frame: true,
|
||||
cue_index: cueIndex,
|
||||
start_ms: startMs,
|
||||
end_ms: startMs + 6000,
|
||||
duration_ms: 6000,
|
||||
})
|
||||
}
|
||||
|
||||
// Default props used in most tests
|
||||
function defaultProps(overrides: Record<string, unknown> = {}) {
|
||||
return {
|
||||
segments: [],
|
||||
pausePoints: [],
|
||||
totalDurationMs: 10000,
|
||||
currentTimeMs: 0,
|
||||
onSegmentClick: vi.fn(),
|
||||
onPausePointClick: vi.fn(),
|
||||
onPausePointUpdate: vi.fn(),
|
||||
onRegenerateTTS: vi.fn(),
|
||||
regenerationQueue: [],
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
// Mock pointer capture (jsdom does not implement setPointerCapture)
|
||||
beforeEach(() => {
|
||||
Element.prototype.setPointerCapture = vi.fn()
|
||||
Element.prototype.releasePointerCapture = vi.fn()
|
||||
})
|
||||
|
||||
// jsdom's PointerEvent doesn't expose MouseEvent properties (button, clientX) from init —
|
||||
// they come back as undefined, breaking `if (e.button !== 0)` and delta math.
|
||||
// Work-around: dispatch MouseEvents typed as pointer events. MouseEvent correctly handles
|
||||
// all MouseEventInit fields; React 19 routes by event.type, not constructor type.
|
||||
function ptrDown(el: HTMLElement, init: { button?: number; clientX: number }) {
|
||||
fireEvent(el, new MouseEvent('pointerdown', {
|
||||
bubbles: true, cancelable: true,
|
||||
button: init.button ?? 0, clientX: init.clientX,
|
||||
}))
|
||||
}
|
||||
|
||||
function ptrMove(el: HTMLElement, init: { clientX: number }) {
|
||||
fireEvent(el, new MouseEvent('pointermove', {
|
||||
bubbles: true, cancelable: true, clientX: init.clientX,
|
||||
}))
|
||||
}
|
||||
|
||||
function ptrUp(el: HTMLElement, init: { clientX: number }) {
|
||||
fireEvent(el, new MouseEvent('pointerup', {
|
||||
bubbles: true, cancelable: true, clientX: init.clientX,
|
||||
}))
|
||||
}
|
||||
|
||||
// Helper to mock the timeline container rect so clientX→ms math works
|
||||
function mockTimelineRect(container: HTMLElement) {
|
||||
const timelineDiv = container.querySelector('.relative.h-16') as HTMLElement
|
||||
if (timelineDiv) {
|
||||
vi.spyOn(timelineDiv, 'getBoundingClientRect').mockReturnValue({
|
||||
left: 0, right: 1000, width: 1000, top: 0, bottom: 64, height: 64, x: 0, y: 0,
|
||||
toJSON: () => {},
|
||||
} as DOMRect)
|
||||
}
|
||||
return timelineDiv
|
||||
}
|
||||
|
||||
// ── Marker drag tests (B1) ────────────────────────────────────────────────────
|
||||
|
||||
describe('TimelinePreview — marker drag', () => {
|
||||
it('calls onPausePointUpdate when marker is dragged and released', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
expect(marker).toBeTruthy()
|
||||
|
||||
// Drag: down at x=500 (5s), move to x=700 (7s), up at x=700
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 }) // >3px threshold
|
||||
ptrMove(marker, { clientX: 700 })
|
||||
ptrUp(marker, { clientX: 700 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).toHaveBeenCalledOnce()
|
||||
const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0]
|
||||
expect(cueIndex).toBe(0)
|
||||
// 700/1000 * 10000 = 7000ms, within bounds [1000, 9000]
|
||||
expect(adjustedMs).toBe(7000)
|
||||
})
|
||||
|
||||
it('opens editor popover on click (no movement)', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const onPausePointClick = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate, onPausePointClick })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
// No move — stayed at same position
|
||||
ptrUp(marker, { clientX: 500 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).not.toHaveBeenCalled()
|
||||
expect(onPausePointClick).toHaveBeenCalledWith(pp)
|
||||
})
|
||||
|
||||
it('does NOT call onPausePointUpdate if final position equals original', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
// Drag to 500 → 504 → 500 (same ms as start = 5000ms)
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 500 })
|
||||
ptrUp(marker, { clientX: 500 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('clamps drag to min_bound_ms', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000, min_bound_ms: 2000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
// Drag to x=50 → 500ms, below min_bound_ms=2000ms
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 50 })
|
||||
ptrUp(marker, { clientX: 50 })
|
||||
})
|
||||
|
||||
const [, adjustedMs] = onPausePointUpdate.mock.calls[0]
|
||||
expect(adjustedMs).toBe(2000) // clamped to min_bound_ms
|
||||
})
|
||||
|
||||
it('clamps drag to max_bound_ms', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000, max_bound_ms: 8000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
// Drag to x=950 → 9500ms, above max_bound_ms=8000ms
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 950 })
|
||||
ptrUp(marker, { clientX: 950 })
|
||||
})
|
||||
|
||||
const [, adjustedMs] = onPausePointUpdate.mock.calls[0]
|
||||
expect(adjustedMs).toBe(8000) // clamped to max_bound_ms
|
||||
})
|
||||
|
||||
it('right-click does NOT start drag (context menu allowed)', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 2, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 700 })
|
||||
ptrUp(marker, { clientX: 700 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
// ── Freeze-block drag tests (B2) ─────────────────────────────────────────────
|
||||
|
||||
describe('TimelinePreview — freeze-block drag', () => {
|
||||
it('calls onPausePointUpdate when freeze block is dragged', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
|
||||
const freeze = makeFreezeSegment(0, 5000)
|
||||
const { container } = render(
|
||||
<TimelinePreview
|
||||
{...defaultProps({
|
||||
pausePoints: [pp],
|
||||
segments: [freeze],
|
||||
onPausePointUpdate,
|
||||
})}
|
||||
/>
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const block = container.querySelector('[title*="AD Cue 1"]') as HTMLElement
|
||||
expect(block).toBeTruthy()
|
||||
|
||||
// Drag block from x=500 → x=600 (+1s = 1000ms delta)
|
||||
await act(async () => {
|
||||
ptrDown(block, { button: 0, clientX: 500 })
|
||||
ptrMove(block, { clientX: 504 })
|
||||
ptrMove(block, { clientX: 600 })
|
||||
ptrUp(block, { clientX: 600 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).toHaveBeenCalledOnce()
|
||||
const [cueIndex, adjustedMs] = onPausePointUpdate.mock.calls[0]
|
||||
expect(cueIndex).toBe(0)
|
||||
expect(adjustedMs).toBe(6000) // 600/1000 * 10000 = 6000ms
|
||||
})
|
||||
|
||||
it('non-freeze segments do not start drag', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const seg = makeSegment({ segment_index: 0, is_freeze_frame: false, start_ms: 0, duration_ms: 4000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview
|
||||
{...defaultProps({ segments: [seg], onPausePointUpdate })}
|
||||
/>
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const videoSeg = container.querySelector('[title*="Video segment"]') as HTMLElement
|
||||
if (!videoSeg) return // no drag handlers on non-freeze segments → trivially passes
|
||||
|
||||
await act(async () => {
|
||||
ptrDown(videoSeg, { button: 0, clientX: 200 })
|
||||
ptrMove(videoSeg, { clientX: 204 })
|
||||
ptrMove(videoSeg, { clientX: 400 })
|
||||
ptrUp(videoSeg, { clientX: 400 })
|
||||
})
|
||||
|
||||
expect(onPausePointUpdate).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
// ── Drag tooltip (B3) ─────────────────────────────────────────────────────────
|
||||
|
||||
describe('TimelinePreview — drag tooltip', () => {
|
||||
it('shows time tooltip during marker drag', async () => {
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp] })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
// Wrap in act so React flushes state updates before we query the DOM
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 700 })
|
||||
})
|
||||
|
||||
const tooltip = container.querySelector('.bg-gray-800') as HTMLElement
|
||||
expect(tooltip).toBeTruthy()
|
||||
expect(tooltip.textContent).toMatch(/\d:\d{2}/)
|
||||
})
|
||||
|
||||
it('hides tooltip after drag ends', async () => {
|
||||
const onPausePointUpdate = vi.fn()
|
||||
const pp = makePausePoint({ cue_index: 0, original_ms: 5000 })
|
||||
const { container } = render(
|
||||
<TimelinePreview {...defaultProps({ pausePoints: [pp], onPausePointUpdate })} />
|
||||
)
|
||||
|
||||
mockTimelineRect(container)
|
||||
const marker = container.querySelector('[title*="Pause point 1"]') as HTMLElement
|
||||
|
||||
await act(async () => {
|
||||
ptrDown(marker, { button: 0, clientX: 500 })
|
||||
ptrMove(marker, { clientX: 504 })
|
||||
ptrMove(marker, { clientX: 700 })
|
||||
ptrUp(marker, { clientX: 700 })
|
||||
})
|
||||
|
||||
const tooltip = container.querySelector('.bg-gray-800')
|
||||
expect(tooltip).toBeNull()
|
||||
})
|
||||
})
|
||||
|
|
@ -38,6 +38,7 @@ global.ResizeObserver = vi.fn(() => ({
|
|||
global.URL.createObjectURL = vi.fn(() => 'mock-object-url')
|
||||
global.URL.revokeObjectURL = vi.fn()
|
||||
|
||||
|
||||
// Mock HTMLMediaElement for video components
|
||||
Object.defineProperty(HTMLMediaElement.prototype, 'load', {
|
||||
writable: true,
|
||||
|
|
|
|||
|
|
@ -647,6 +647,7 @@ export interface PausePointData {
|
|||
adjusted_ms: number | null;
|
||||
min_bound_ms: number;
|
||||
max_bound_ms: number;
|
||||
natural_gap_ms?: number; // Duration (ms) of natural silence at the pause point; 0 = none
|
||||
}
|
||||
|
||||
export interface VideoSegmentMetadata {
|
||||
|
|
|
|||
|
|
@ -203,7 +203,7 @@ display_summary() {
|
|||
echo -e "${GREEN}Frontend successfully deployed!${NC}"
|
||||
echo ""
|
||||
echo "Deployment location: $DEPLOY_DIR"
|
||||
echo "Frontend URL: https://ai-sandbox.oliver.solutions/video-accessibility"
|
||||
echo "Frontend URL: https://optical-dev.oliver.solutions/video-accessibility"
|
||||
echo ""
|
||||
echo "To verify the deployment, visit the URL above in your browser."
|
||||
echo ""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue