From d0928006769c69246001ca09ec3a2ab9b271f11c Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 28 Dec 2025 23:33:15 -0600 Subject: [PATCH] fix: treat consolidated AD cues as single segment for buffering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, all consolidated cues shared the same pause_point AND resume_from, which caused the overlap video segment to play between each AD cue in a consolidated group. Now consolidated cues are treated as a single AD segment: - All cues in a group share the same pause_point (front buffer once) - Only the LAST cue keeps resume_from (back buffer once) - Other cues have resume_from = pause_point (no video between ADs) This ensures consolidated ADs play seamlessly back-to-back: - Video plays up to pause_point (front buffer) - AD_1 plays - AD_2 plays immediately (no video) - AD_n plays immediately (no video) - Video resumes from resume_from (back buffer) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/services/whisper_service.py | 80 ++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 8 deletions(-) diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 5605487..39e3129 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -344,9 +344,13 @@ class WhisperService: """ Consolidate AD cues whose pause points are within threshold seconds of each other. - If a cue's pause_point is within `threshold` seconds of the previous cue's - pause_point, set them to the same pause_point and resume_from so they play back-to-back. - This avoids inserting AD cues in the middle of sentences. + Consolidated cues are treated as a single AD segment: + - All cues in a group share the same pause_point (front buffer applied once) + - Only the LAST cue in the group keeps resume_from (back buffer applied once) + - Middle cues have resume_from = pause_point (no video between ADs) + + This ensures consolidated ADs play seamlessly back-to-back without + repeating the overlap video segment between each one. Args: placements: List of refined placement dicts @@ -359,7 +363,9 @@ class WhisperService: if len(placements) < 2: return placements - consolidated = [placements[0]] + # First pass: identify consolidated groups and assign same pause_point + consolidated = [placements[0].copy()] + group_resume_from = placements[0].get("resume_from") # Track the back buffer for the group for i in range(1, len(placements)): current = placements[i].copy() @@ -372,30 +378,88 @@ class WhisperService: gap = current_pause - previous_pause if 0 < gap <= threshold: - # Consolidate: set current cue to use same pause point and resume_from as previous + # Consolidate: set current cue to use same pause point as previous original_pause = current_pause original_resume = current.get("resume_from") current["pause_point"] = previous_pause - current["resume_from"] = previous.get("resume_from") # Use same resume_from as previous current["consolidated_with_previous"] = True current["original_pause_point_before_consolidation"] = original_pause current["original_resume_from_before_consolidation"] = original_resume + # Keep tracking the group's resume_from (we'll assign it to the last cue later) logger.info( f"Consolidated cue {current['ad_cue_index']} with previous cue: " - f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s, " - f"resume_from {original_resume:.2f}s -> {previous.get('resume_from'):.2f}s " + f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s " f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)" ) warnings.append( f"Cue {current['ad_cue_index']}: Consolidated with previous cue " f"(pause points were {gap:.2f}s apart, playing back-to-back)" ) + else: + # New group starts - update group_resume_from for the new group + group_resume_from = current.get("resume_from") consolidated.append(current) + # Second pass: fix resume_from values for consolidated groups + # Only the LAST cue in each group should have the back buffer (resume_from < pause_point) + # All other cues should have resume_from = pause_point (no video between ADs) + for i in range(len(consolidated)): + current = consolidated[i] + current_pause = current.get("pause_point") + + if current_pause is None: + continue + + # Check if next cue has the same pause_point (meaning current is NOT last in group) + if i < len(consolidated) - 1: + next_pause = consolidated[i + 1].get("pause_point") + if next_pause == current_pause: + # Current is NOT the last in the group - remove back buffer + # Set resume_from = pause_point so no video plays between this AD and the next + original_resume = current.get("resume_from") + if original_resume != current_pause: + current["resume_from"] = current_pause + current["resume_from_removed_for_consolidation"] = original_resume + logger.debug( + f"Cue {current.get('ad_cue_index')}: Removed back buffer for seamless " + f"consolidated playback (resume_from {original_resume:.2f}s -> {current_pause:.2f}s)" + ) + + # Log the final consolidated groups + self._log_consolidated_groups(consolidated) + return consolidated + def _log_consolidated_groups(self, placements: list[dict]) -> None: + """Log information about consolidated AD cue groups.""" + groups = [] + current_group = [] + + for p in placements: + pause_point = p.get("pause_point") + if not current_group or current_group[-1].get("pause_point") == pause_point: + current_group.append(p) + else: + if len(current_group) > 1: + groups.append(current_group) + current_group = [p] + + if len(current_group) > 1: + groups.append(current_group) + + for group in groups: + cue_indices = [p.get("ad_cue_index") for p in group] + pause_point = group[0].get("pause_point") + first_resume = group[0].get("resume_from") + last_resume = group[-1].get("resume_from") + logger.info( + f"Consolidated group: cues {cue_indices} at pause_point={pause_point:.2f}s, " + f"first_resume_from={first_resume:.2f}s (should equal pause_point), " + f"last_resume_from={last_resume:.2f}s (back buffer)" + ) + def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str: """Find the gap type for a given timestamp.""" for gap in gaps: