fix: treat consolidated AD cues as single segment for buffering

Previously, all consolidated cues shared the same pause_point AND resume_from, which caused the overlap video segment to play between each AD cue in a consolidated group. Now consolidated cues are treated as a single AD segment: - All cues in a group share the same pause_point (front buffer once) - Only the LAST cue keeps resume_from (back buffer once) - Other cues have resume_from = pause_point (no video between ADs) This ensures consolidated ADs play seamlessly back-to-back: - Video plays up to pause_point (front buffer) - AD_1 plays - AD_2 plays immediately (no video) - AD_n plays immediately (no video) - Video resumes from resume_from (back buffer) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 23:33:15 -06:00 · 2025-12-28 23:33:15 -06:00 · d092800676
commit d092800676
parent ee6a30e7a7
1 changed files with 72 additions and 8 deletions
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@ -344,9 +344,13 @@ class WhisperService:
        """
        Consolidate AD cues whose pause points are within threshold seconds of each other.

-        If a cue's pause_point is within `threshold` seconds of the previous cue's
-        pause_point, set them to the same pause_point and resume_from so they play back-to-back.
-        This avoids inserting AD cues in the middle of sentences.
+        Consolidated cues are treated as a single AD segment:
+        - All cues in a group share the same pause_point (front buffer applied once)
+        - Only the LAST cue in the group keeps resume_from (back buffer applied once)
+        - Middle cues have resume_from = pause_point (no video between ADs)
+
+        This ensures consolidated ADs play seamlessly back-to-back without
+        repeating the overlap video segment between each one.

        Args:
            placements: List of refined placement dicts
@ -359,7 +363,9 @@ class WhisperService:
        if len(placements) < 2:
            return placements

-        consolidated = [placements[0]]
+        # First pass: identify consolidated groups and assign same pause_point
+        consolidated = [placements[0].copy()]
+        group_resume_from = placements[0].get("resume_from")  # Track the back buffer for the group

        for i in range(1, len(placements)):
            current = placements[i].copy()
@ -372,30 +378,88 @@ class WhisperService:
                gap = current_pause - previous_pause

                if 0 < gap <= threshold:
-                    # Consolidate: set current cue to use same pause point and resume_from as previous
+                    # Consolidate: set current cue to use same pause point as previous
                    original_pause = current_pause
                    original_resume = current.get("resume_from")
                    current["pause_point"] = previous_pause
-                    current["resume_from"] = previous.get("resume_from")  # Use same resume_from as previous
                    current["consolidated_with_previous"] = True
                    current["original_pause_point_before_consolidation"] = original_pause
                    current["original_resume_from_before_consolidation"] = original_resume
+                    # Keep tracking the group's resume_from (we'll assign it to the last cue later)

                    logger.info(
                        f"Consolidated cue {current['ad_cue_index']} with previous cue: "
-                        f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s, "
-                        f"resume_from {original_resume:.2f}s -> {previous.get('resume_from'):.2f}s "
+                        f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s "
                        f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
                    )
                    warnings.append(
                        f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
                        f"(pause points were {gap:.2f}s apart, playing back-to-back)"
                    )
+                else:
+                    # New group starts - update group_resume_from for the new group
+                    group_resume_from = current.get("resume_from")

            consolidated.append(current)

+        # Second pass: fix resume_from values for consolidated groups
+        # Only the LAST cue in each group should have the back buffer (resume_from < pause_point)
+        # All other cues should have resume_from = pause_point (no video between ADs)
+        for i in range(len(consolidated)):
+            current = consolidated[i]
+            current_pause = current.get("pause_point")
+
+            if current_pause is None:
+                continue
+
+            # Check if next cue has the same pause_point (meaning current is NOT last in group)
+            if i < len(consolidated) - 1:
+                next_pause = consolidated[i + 1].get("pause_point")
+                if next_pause == current_pause:
+                    # Current is NOT the last in the group - remove back buffer
+                    # Set resume_from = pause_point so no video plays between this AD and the next
+                    original_resume = current.get("resume_from")
+                    if original_resume != current_pause:
+                        current["resume_from"] = current_pause
+                        current["resume_from_removed_for_consolidation"] = original_resume
+                        logger.debug(
+                            f"Cue {current.get('ad_cue_index')}: Removed back buffer for seamless "
+                            f"consolidated playback (resume_from {original_resume:.2f}s -> {current_pause:.2f}s)"
+                        )
+
+        # Log the final consolidated groups
+        self._log_consolidated_groups(consolidated)
+
        return consolidated

+    def _log_consolidated_groups(self, placements: list[dict]) -> None:
+        """Log information about consolidated AD cue groups."""
+        groups = []
+        current_group = []
+
+        for p in placements:
+            pause_point = p.get("pause_point")
+            if not current_group or current_group[-1].get("pause_point") == pause_point:
+                current_group.append(p)
+            else:
+                if len(current_group) > 1:
+                    groups.append(current_group)
+                current_group = [p]
+
+        if len(current_group) > 1:
+            groups.append(current_group)
+
+        for group in groups:
+            cue_indices = [p.get("ad_cue_index") for p in group]
+            pause_point = group[0].get("pause_point")
+            first_resume = group[0].get("resume_from")
+            last_resume = group[-1].get("resume_from")
+            logger.info(
+                f"Consolidated group: cues {cue_indices} at pause_point={pause_point:.2f}s, "
+                f"first_resume_from={first_resume:.2f}s (should equal pause_point), "
+                f"last_resume_from={last_resume:.2f}s (back buffer)"
+            )
+
    def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
        """Find the gap type for a given timestamp."""
        for gap in gaps: