fix: treat consolidated AD cues as single segment for buffering

Previously, all consolidated cues shared the same pause_point AND
resume_from, which caused the overlap video segment to play between
each AD cue in a consolidated group.

Now consolidated cues are treated as a single AD segment:
- All cues in a group share the same pause_point (front buffer once)
- Only the LAST cue keeps resume_from (back buffer once)
- Other cues have resume_from = pause_point (no video between ADs)

This ensures consolidated ADs play seamlessly back-to-back:
- Video plays up to pause_point (front buffer)
- AD_1 plays
- AD_2 plays immediately (no video)
- AD_n plays immediately (no video)
- Video resumes from resume_from (back buffer)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-28 23:33:15 -06:00
parent ee6a30e7a7
commit d092800676

View file

@ -344,9 +344,13 @@ class WhisperService:
"""
Consolidate AD cues whose pause points are within threshold seconds of each other.
If a cue's pause_point is within `threshold` seconds of the previous cue's
pause_point, set them to the same pause_point and resume_from so they play back-to-back.
This avoids inserting AD cues in the middle of sentences.
Consolidated cues are treated as a single AD segment:
- All cues in a group share the same pause_point (front buffer applied once)
- Only the LAST cue in the group keeps resume_from (back buffer applied once)
- Middle cues have resume_from = pause_point (no video between ADs)
This ensures consolidated ADs play seamlessly back-to-back without
repeating the overlap video segment between each one.
Args:
placements: List of refined placement dicts
@ -359,7 +363,9 @@ class WhisperService:
if len(placements) < 2:
return placements
consolidated = [placements[0]]
# First pass: identify consolidated groups and assign same pause_point
consolidated = [placements[0].copy()]
group_resume_from = placements[0].get("resume_from") # Track the back buffer for the group
for i in range(1, len(placements)):
current = placements[i].copy()
@ -372,30 +378,88 @@ class WhisperService:
gap = current_pause - previous_pause
if 0 < gap <= threshold:
# Consolidate: set current cue to use same pause point and resume_from as previous
# Consolidate: set current cue to use same pause point as previous
original_pause = current_pause
original_resume = current.get("resume_from")
current["pause_point"] = previous_pause
current["resume_from"] = previous.get("resume_from") # Use same resume_from as previous
current["consolidated_with_previous"] = True
current["original_pause_point_before_consolidation"] = original_pause
current["original_resume_from_before_consolidation"] = original_resume
# Keep tracking the group's resume_from (we'll assign it to the last cue later)
logger.info(
f"Consolidated cue {current['ad_cue_index']} with previous cue: "
f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s, "
f"resume_from {original_resume:.2f}s -> {previous.get('resume_from'):.2f}s "
f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s "
f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
)
warnings.append(
f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
f"(pause points were {gap:.2f}s apart, playing back-to-back)"
)
else:
# New group starts - update group_resume_from for the new group
group_resume_from = current.get("resume_from")
consolidated.append(current)
# Second pass: fix resume_from values for consolidated groups
# Only the LAST cue in each group should have the back buffer (resume_from < pause_point)
# All other cues should have resume_from = pause_point (no video between ADs)
for i in range(len(consolidated)):
current = consolidated[i]
current_pause = current.get("pause_point")
if current_pause is None:
continue
# Check if next cue has the same pause_point (meaning current is NOT last in group)
if i < len(consolidated) - 1:
next_pause = consolidated[i + 1].get("pause_point")
if next_pause == current_pause:
# Current is NOT the last in the group - remove back buffer
# Set resume_from = pause_point so no video plays between this AD and the next
original_resume = current.get("resume_from")
if original_resume != current_pause:
current["resume_from"] = current_pause
current["resume_from_removed_for_consolidation"] = original_resume
logger.debug(
f"Cue {current.get('ad_cue_index')}: Removed back buffer for seamless "
f"consolidated playback (resume_from {original_resume:.2f}s -> {current_pause:.2f}s)"
)
# Log the final consolidated groups
self._log_consolidated_groups(consolidated)
return consolidated
def _log_consolidated_groups(self, placements: list[dict]) -> None:
"""Log information about consolidated AD cue groups."""
groups = []
current_group = []
for p in placements:
pause_point = p.get("pause_point")
if not current_group or current_group[-1].get("pause_point") == pause_point:
current_group.append(p)
else:
if len(current_group) > 1:
groups.append(current_group)
current_group = [p]
if len(current_group) > 1:
groups.append(current_group)
for group in groups:
cue_indices = [p.get("ad_cue_index") for p in group]
pause_point = group[0].get("pause_point")
first_resume = group[0].get("resume_from")
last_resume = group[-1].get("resume_from")
logger.info(
f"Consolidated group: cues {cue_indices} at pause_point={pause_point:.2f}s, "
f"first_resume_from={first_resume:.2f}s (should equal pause_point), "
f"last_resume_from={last_resume:.2f}s (back buffer)"
)
def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
"""Find the gap type for a given timestamp."""
for gap in gaps: