fix: enforce AD cue pause_point monotonicity to preserve cue order
Whisper's snap_pause_point() finds the nearest sentence boundary independently per cue, which can move a later cue's pause_point before an earlier cue's. The renderer then sorts by pause_point, producing non-sequential cue indices in the timeline. Add a forward monotonicity pass (clamp each pause_point >= previous) at three layers for defense-in-depth: - whisper_service: Phase 3 after consolidation - video_renderer: before temporal sort in _render_pause_insert_method - rerender_accessible_video: in _build_placements_with_adjustments Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0c3102b77f
commit
030f1b67ee
3 changed files with 55 additions and 2 deletions
|
|
@ -511,10 +511,29 @@ class VideoRendererService:
|
|||
logger.info(f"Starting pause-insert render for {source_video_path}")
|
||||
placements = analysis.get("placements", [])
|
||||
|
||||
# Sort placements by pause_point time, with ad_cue_index as secondary key
|
||||
# Defensive: enforce pause_point monotonicity in cue_index order before temporal sort.
|
||||
# Whisper refinement or user adjustments can cause a later cue's pause_point to
|
||||
# precede an earlier cue's, which would reorder cues in the rendered timeline.
|
||||
cue_ordered = sorted(
|
||||
[p for p in placements if p.get("pause_point") is not None],
|
||||
key=lambda p: p.get("ad_cue_index", 0)
|
||||
)
|
||||
for i in range(1, len(cue_ordered)):
|
||||
prev_pp = cue_ordered[i - 1]["pause_point"]
|
||||
curr_pp = cue_ordered[i]["pause_point"]
|
||||
if curr_pp < prev_pp:
|
||||
logger.warning(
|
||||
f"Renderer monotonicity fix: cue {cue_ordered[i].get('ad_cue_index')} "
|
||||
f"pause_point {curr_pp:.2f}s < cue {cue_ordered[i-1].get('ad_cue_index')} "
|
||||
f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s"
|
||||
)
|
||||
cue_ordered[i]["pause_point"] = prev_pp
|
||||
cue_ordered[i]["resume_from"] = prev_pp
|
||||
|
||||
# Sort by pause_point time, with ad_cue_index as secondary key
|
||||
# to ensure consolidated cues (sharing same pause_point) maintain VTT order
|
||||
sorted_placements = sorted(
|
||||
[p for p in placements if p.get("pause_point") is not None],
|
||||
cue_ordered,
|
||||
key=lambda p: (p["pause_point"], p.get("ad_cue_index", 0))
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -515,6 +515,26 @@ class WhisperService:
|
|||
refined_placements, consolidation_threshold, warnings
|
||||
)
|
||||
|
||||
# Phase 3: Enforce monotonicity - pause_points must be non-decreasing in cue_index order
|
||||
# Whisper's snap_pause_point() finds the nearest boundary independently per cue,
|
||||
# which can move a later cue's pause_point before an earlier cue's.
|
||||
for i in range(1, len(refined_placements)):
|
||||
prev_pp = refined_placements[i - 1].get("pause_point")
|
||||
curr_pp = refined_placements[i].get("pause_point")
|
||||
if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp:
|
||||
refined_placements[i]["pause_point"] = prev_pp
|
||||
refined_placements[i]["resume_from"] = prev_pp
|
||||
refined_placements[i]["monotonicity_clamped"] = True
|
||||
cue_idx = refined_placements[i].get("ad_cue_index", i)
|
||||
prev_cue_idx = refined_placements[i - 1].get("ad_cue_index", i - 1)
|
||||
warning_msg = (
|
||||
f"Cue {cue_idx}: Monotonicity violation - pause_point {curr_pp:.2f}s "
|
||||
f"was before cue {prev_cue_idx}'s pause_point {prev_pp:.2f}s, "
|
||||
f"clamped to {prev_pp:.2f}s"
|
||||
)
|
||||
warnings.append(warning_msg)
|
||||
logger.warning(warning_msg)
|
||||
|
||||
return refined_placements, warnings
|
||||
|
||||
def _consolidate_close_cues(
|
||||
|
|
|
|||
|
|
@ -492,6 +492,20 @@ def _build_placements_with_adjustments(
|
|||
"pause_point_rationale": "User-adjusted during QC" if i in adjusted_pause_by_cue else "Original from VTT"
|
||||
})
|
||||
|
||||
# Enforce pause_point monotonicity - pause_points must be non-decreasing in cue order.
|
||||
# User-adjusted pause points can cross over each other; clamp to maintain cue order.
|
||||
for i in range(1, len(placements)):
|
||||
prev_pp = placements[i - 1].get("pause_point")
|
||||
curr_pp = placements[i].get("pause_point")
|
||||
if curr_pp is not None and prev_pp is not None and curr_pp < prev_pp:
|
||||
logger.warning(
|
||||
f"Rerender monotonicity fix: cue {placements[i].get('ad_cue_index')} "
|
||||
f"pause_point {curr_pp:.2f}s < cue {placements[i-1].get('ad_cue_index')} "
|
||||
f"pause_point {prev_pp:.2f}s, clamping to {prev_pp:.2f}s"
|
||||
)
|
||||
placements[i]["pause_point"] = prev_pp
|
||||
placements[i]["resume_from"] = prev_pp
|
||||
|
||||
return placements
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue