diff --git a/backend/app/services/video_renderer.py b/backend/app/services/video_renderer.py index 557badd..d24226b 100644 --- a/backend/app/services/video_renderer.py +++ b/backend/app/services/video_renderer.py @@ -511,10 +511,29 @@ class VideoRendererService: logger.info(f"Starting pause-insert render for {source_video_path}") placements = analysis.get("placements", []) - # Sort placements by pause_point time, with ad_cue_index as secondary key + # Defensive: enforce pause_point monotonicity in cue_index order before temporal sort. + # Whisper refinement or user adjustments can cause a later cue's pause_point to + # precede an earlier cue's, which would reorder cues in the rendered timeline. + cue_ordered = sorted( + [p for p in placements if p.get("pause_point") is not None], + key=lambda p: p.get("ad_cue_index", 0) + ) + for i in range(1, len(cue_ordered)): + prev_pp = cue_ordered[i - 1]["pause_point"] + curr_pp = cue_ordered[i]["pause_point"] + if curr_pp < prev_pp: + logger.warning( + f"Renderer monotonicity fix: cue {cue_ordered[i].get('ad_cue_index')} " + f"pause_point {curr_pp:.2f}s < cue {cue_ordered[i-1].get('ad_cue_index')} " + f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s" + ) + cue_ordered[i]["pause_point"] = prev_pp + cue_ordered[i]["resume_from"] = prev_pp + + # Sort by pause_point time, with ad_cue_index as secondary key # to ensure consolidated cues (sharing same pause_point) maintain VTT order sorted_placements = sorted( - [p for p in placements if p.get("pause_point") is not None], + cue_ordered, key=lambda p: (p["pause_point"], p.get("ad_cue_index", 0)) ) diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 1dc10b4..2675042 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -515,6 +515,26 @@ class WhisperService: refined_placements, consolidation_threshold, warnings ) + # Phase 3: Enforce monotonicity - pause_points must be non-decreasing in cue_index order + # Whisper's snap_pause_point() finds the nearest boundary independently per cue, + # which can move a later cue's pause_point before an earlier cue's. + for i in range(1, len(refined_placements)): + prev_pp = refined_placements[i - 1].get("pause_point") + curr_pp = refined_placements[i].get("pause_point") + if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp: + refined_placements[i]["pause_point"] = prev_pp + refined_placements[i]["resume_from"] = prev_pp + refined_placements[i]["monotonicity_clamped"] = True + cue_idx = refined_placements[i].get("ad_cue_index", i) + prev_cue_idx = refined_placements[i - 1].get("ad_cue_index", i - 1) + warning_msg = ( + f"Cue {cue_idx}: Monotonicity violation - pause_point {curr_pp:.2f}s " + f"was before cue {prev_cue_idx}'s pause_point {prev_pp:.2f}s, " + f"clamped to {prev_pp:.2f}s" + ) + warnings.append(warning_msg) + logger.warning(warning_msg) + return refined_placements, warnings def _consolidate_close_cues( diff --git a/backend/app/tasks/rerender_accessible_video.py b/backend/app/tasks/rerender_accessible_video.py index bc03770..458bd2d 100644 --- a/backend/app/tasks/rerender_accessible_video.py +++ b/backend/app/tasks/rerender_accessible_video.py @@ -492,6 +492,20 @@ def _build_placements_with_adjustments( "pause_point_rationale": "User-adjusted during QC" if i in adjusted_pause_by_cue else "Original from VTT" }) + # Enforce pause_point monotonicity - pause_points must be non-decreasing in cue order. + # User-adjusted pause points can cross over each other; clamp to maintain cue order. + for i in range(1, len(placements)): + prev_pp = placements[i - 1].get("pause_point") + curr_pp = placements[i].get("pause_point") + if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp: + logger.warning( + f"Rerender monotonicity fix: cue {placements[i].get('ad_cue_index')} " + f"pause_point {curr_pp:.2f}s < cue {placements[i-1].get('ad_cue_index')} " + f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s" + ) + placements[i]["pause_point"] = prev_pp + placements[i]["resume_from"] = prev_pp + return placements