From d0928006769c69246001ca09ec3a2ab9b271f11c Mon Sep 17 00:00:00 2001
From: michael <michael@modernfreedom.com>
Date: Sun, 28 Dec 2025 23:33:15 -0600
Subject: [PATCH] fix: treat consolidated AD cues as single segment for
 buffering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, all consolidated cues shared the same pause_point AND
resume_from, which caused the overlap video segment to play between
each AD cue in a consolidated group.

Now consolidated cues are treated as a single AD segment:
- All cues in a group share the same pause_point (front buffer once)
- Only the LAST cue keeps resume_from (back buffer once)
- Other cues have resume_from = pause_point (no video between ADs)

This ensures consolidated ADs play seamlessly back-to-back:
- Video plays up to pause_point (front buffer)
- AD_1 plays
- AD_2 plays immediately (no video)
- AD_n plays immediately (no video)
- Video resumes from resume_from (back buffer)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/app/services/whisper_service.py | 80 ++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 8 deletions(-)

diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py
index 5605487..39e3129 100644
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@@ -344,9 +344,13 @@ class WhisperService:
         """
         Consolidate AD cues whose pause points are within threshold seconds of each other.
 
-        If a cue's pause_point is within `threshold` seconds of the previous cue's
-        pause_point, set them to the same pause_point and resume_from so they play back-to-back.
-        This avoids inserting AD cues in the middle of sentences.
+        Consolidated cues are treated as a single AD segment:
+        - All cues in a group share the same pause_point (front buffer applied once)
+        - Only the LAST cue in the group keeps resume_from (back buffer applied once)
+        - Middle cues have resume_from = pause_point (no video between ADs)
+
+        This ensures consolidated ADs play seamlessly back-to-back without
+        repeating the overlap video segment between each one.
 
         Args:
             placements: List of refined placement dicts
@@ -359,7 +363,9 @@ class WhisperService:
         if len(placements) < 2:
             return placements
 
-        consolidated = [placements[0]]
+        # First pass: identify consolidated groups and assign same pause_point
+        consolidated = [placements[0].copy()]
+        group_resume_from = placements[0].get("resume_from")  # Track the back buffer for the group
 
         for i in range(1, len(placements)):
             current = placements[i].copy()
@@ -372,30 +378,88 @@ class WhisperService:
                 gap = current_pause - previous_pause
 
                 if 0 < gap <= threshold:
-                    # Consolidate: set current cue to use same pause point and resume_from as previous
+                    # Consolidate: set current cue to use same pause point as previous
                     original_pause = current_pause
                     original_resume = current.get("resume_from")
                     current["pause_point"] = previous_pause
-                    current["resume_from"] = previous.get("resume_from")  # Use same resume_from as previous
                     current["consolidated_with_previous"] = True
                     current["original_pause_point_before_consolidation"] = original_pause
                     current["original_resume_from_before_consolidation"] = original_resume
+                    # Keep tracking the group's resume_from (we'll assign it to the last cue later)
 
                     logger.info(
                         f"Consolidated cue {current['ad_cue_index']} with previous cue: "
-                        f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s, "
-                        f"resume_from {original_resume:.2f}s -> {previous.get('resume_from'):.2f}s "
+                        f"pause_point {original_pause:.2f}s -> {previous_pause:.2f}s "
                         f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
                     )
                     warnings.append(
                         f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
                         f"(pause points were {gap:.2f}s apart, playing back-to-back)"
                     )
+                else:
+                    # New group starts - update group_resume_from for the new group
+                    group_resume_from = current.get("resume_from")
 
             consolidated.append(current)
 
+        # Second pass: fix resume_from values for consolidated groups
+        # Only the LAST cue in each group should have the back buffer (resume_from < pause_point)
+        # All other cues should have resume_from = pause_point (no video between ADs)
+        for i in range(len(consolidated)):
+            current = consolidated[i]
+            current_pause = current.get("pause_point")
+
+            if current_pause is None:
+                continue
+
+            # Check if next cue has the same pause_point (meaning current is NOT last in group)
+            if i < len(consolidated) - 1:
+                next_pause = consolidated[i + 1].get("pause_point")
+                if next_pause == current_pause:
+                    # Current is NOT the last in the group - remove back buffer
+                    # Set resume_from = pause_point so no video plays between this AD and the next
+                    original_resume = current.get("resume_from")
+                    if original_resume != current_pause:
+                        current["resume_from"] = current_pause
+                        current["resume_from_removed_for_consolidation"] = original_resume
+                        logger.debug(
+                            f"Cue {current.get('ad_cue_index')}: Removed back buffer for seamless "
+                            f"consolidated playback (resume_from {original_resume:.2f}s -> {current_pause:.2f}s)"
+                        )
+
+        # Log the final consolidated groups
+        self._log_consolidated_groups(consolidated)
+
         return consolidated
 
+    def _log_consolidated_groups(self, placements: list[dict]) -> None:
+        """Log information about consolidated AD cue groups."""
+        groups = []
+        current_group = []
+
+        for p in placements:
+            pause_point = p.get("pause_point")
+            if not current_group or current_group[-1].get("pause_point") == pause_point:
+                current_group.append(p)
+            else:
+                if len(current_group) > 1:
+                    groups.append(current_group)
+                current_group = [p]
+
+        if len(current_group) > 1:
+            groups.append(current_group)
+
+        for group in groups:
+            cue_indices = [p.get("ad_cue_index") for p in group]
+            pause_point = group[0].get("pause_point")
+            first_resume = group[0].get("resume_from")
+            last_resume = group[-1].get("resume_from")
+            logger.info(
+                f"Consolidated group: cues {cue_indices} at pause_point={pause_point:.2f}s, "
+                f"first_resume_from={first_resume:.2f}s (should equal pause_point), "
+                f"last_resume_from={last_resume:.2f}s (back buffer)"
+            )
+
     def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
         """Find the gap type for a given timestamp."""
         for gap in gaps: