fix: enforce AD cue pause_point monotonicity to preserve cue order

Whisper's snap_pause_point() finds the nearest sentence boundary
independently per cue, which can move a later cue's pause_point before
an earlier cue's. The renderer then sorts by pause_point, producing
non-sequential cue indices in the timeline.

Add a forward monotonicity pass (clamp each pause_point >= previous) at
three layers for defense-in-depth:
- whisper_service: Phase 3 after consolidation
- video_renderer: before temporal sort in _render_pause_insert_method
- rerender_accessible_video: in _build_placements_with_adjustments

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-26 08:15:06 -06:00
parent 0c3102b77f
commit 030f1b67ee
3 changed files with 55 additions and 2 deletions

View file

@ -511,10 +511,29 @@ class VideoRendererService:
logger.info(f"Starting pause-insert render for {source_video_path}")
placements = analysis.get("placements", [])
# Sort placements by pause_point time, with ad_cue_index as secondary key
# Defensive: enforce pause_point monotonicity in cue_index order before temporal sort.
# Whisper refinement or user adjustments can cause a later cue's pause_point to
# precede an earlier cue's, which would reorder cues in the rendered timeline.
cue_ordered = sorted(
[p for p in placements if p.get("pause_point") is not None],
key=lambda p: p.get("ad_cue_index", 0)
)
for i in range(1, len(cue_ordered)):
prev_pp = cue_ordered[i - 1]["pause_point"]
curr_pp = cue_ordered[i]["pause_point"]
if curr_pp < prev_pp:
logger.warning(
f"Renderer monotonicity fix: cue {cue_ordered[i].get('ad_cue_index')} "
f"pause_point {curr_pp:.2f}s < cue {cue_ordered[i-1].get('ad_cue_index')} "
f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s"
)
cue_ordered[i]["pause_point"] = prev_pp
cue_ordered[i]["resume_from"] = prev_pp
# Sort by pause_point time, with ad_cue_index as secondary key
# to ensure consolidated cues (sharing same pause_point) maintain VTT order
sorted_placements = sorted(
[p for p in placements if p.get("pause_point") is not None],
cue_ordered,
key=lambda p: (p["pause_point"], p.get("ad_cue_index", 0))
)

View file

@ -515,6 +515,26 @@ class WhisperService:
refined_placements, consolidation_threshold, warnings
)
# Phase 3: Enforce monotonicity - pause_points must be non-decreasing in cue_index order
# Whisper's snap_pause_point() finds the nearest boundary independently per cue,
# which can move a later cue's pause_point before an earlier cue's.
for i in range(1, len(refined_placements)):
prev_pp = refined_placements[i - 1].get("pause_point")
curr_pp = refined_placements[i].get("pause_point")
if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp:
refined_placements[i]["pause_point"] = prev_pp
refined_placements[i]["resume_from"] = prev_pp
refined_placements[i]["monotonicity_clamped"] = True
cue_idx = refined_placements[i].get("ad_cue_index", i)
prev_cue_idx = refined_placements[i - 1].get("ad_cue_index", i - 1)
warning_msg = (
f"Cue {cue_idx}: Monotonicity violation - pause_point {curr_pp:.2f}s "
f"was before cue {prev_cue_idx}'s pause_point {prev_pp:.2f}s, "
f"clamped to {prev_pp:.2f}s"
)
warnings.append(warning_msg)
logger.warning(warning_msg)
return refined_placements, warnings
def _consolidate_close_cues(

View file

@ -492,6 +492,20 @@ def _build_placements_with_adjustments(
"pause_point_rationale": "User-adjusted during QC" if i in adjusted_pause_by_cue else "Original from VTT"
})
# Enforce pause_point monotonicity - pause_points must be non-decreasing in cue order.
# User-adjusted pause points can cross over each other; clamp to maintain cue order.
for i in range(1, len(placements)):
prev_pp = placements[i - 1].get("pause_point")
curr_pp = placements[i].get("pause_point")
if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp:
logger.warning(
f"Rerender monotonicity fix: cue {placements[i].get('ad_cue_index')} "
f"pause_point {curr_pp:.2f}s < cue {placements[i-1].get('ad_cue_index')} "
f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s"
)
placements[i]["pause_point"] = prev_pp
placements[i]["resume_from"] = prev_pp
return placements