From c605cd1a88ba17c0816d183d8f1b86b29cb5f45a Mon Sep 17 00:00:00 2001
From: michael <michael@modernfreedom.com>
Date: Sun, 28 Dec 2025 16:15:52 -0600
Subject: [PATCH] feat: consolidate AD cues with pause points within 5s of each
 other
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If consecutive AD cues have pause points within 5 seconds, they now
play back-to-back at the same pause point. This prevents AD from being
inserted mid-sentence when cues are close together.

Adds _consolidate_close_cues() method and consolidation_threshold
parameter to refine_all_pause_points().

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/app/services/whisper_service.py | 67 ++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py
index 9dc90ff..b478814 100644
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@@ -215,7 +215,8 @@ class WhisperService:
     def refine_all_pause_points(
         self,
         placements: list[dict],
-        gaps: list[SpeechGap]
+        gaps: list[SpeechGap],
+        consolidation_threshold: float = 5.0
     ) -> tuple[list[dict], list[str]]:
         """
         Refine all pause points in a Gemini analysis result.
@@ -223,6 +224,8 @@ class WhisperService:
         Args:
             placements: List of placement dicts from Gemini analysis
             gaps: Speech gaps from Whisper analysis
+            consolidation_threshold: If consecutive cues have pause points within
+                this many seconds, combine them to play back-to-back (default: 5.0s)
 
         Returns:
             Tuple of (refined_placements, warnings)
@@ -264,8 +267,70 @@ class WhisperService:
 
             refined_placements.append(refined)
 
+        # Consolidate cues that are close together to avoid mid-sentence insertions
+        refined_placements = self._consolidate_close_cues(
+            refined_placements, consolidation_threshold, warnings
+        )
+
         return refined_placements, warnings
 
+    def _consolidate_close_cues(
+        self,
+        placements: list[dict],
+        threshold: float,
+        warnings: list[str]
+    ) -> list[dict]:
+        """
+        Consolidate AD cues whose pause points are within threshold seconds of each other.
+
+        If a cue's pause_point is within `threshold` seconds of the previous cue's
+        pause_point, set them to the same pause_point so they play back-to-back.
+        This avoids inserting AD cues in the middle of sentences.
+
+        Args:
+            placements: List of refined placement dicts
+            threshold: Max seconds between pause points to trigger consolidation
+            warnings: List to append warning messages to
+
+        Returns:
+            Updated placements with consolidated pause points
+        """
+        if len(placements) < 2:
+            return placements
+
+        consolidated = [placements[0]]
+
+        for i in range(1, len(placements)):
+            current = placements[i].copy()
+            previous = consolidated[-1]
+
+            current_pause = current.get("pause_point")
+            previous_pause = previous.get("pause_point")
+
+            if current_pause is not None and previous_pause is not None:
+                gap = current_pause - previous_pause
+
+                if 0 < gap <= threshold:
+                    # Consolidate: set current cue to use same pause point as previous
+                    original_pause = current_pause
+                    current["pause_point"] = previous_pause
+                    current["consolidated_with_previous"] = True
+                    current["original_pause_point_before_consolidation"] = original_pause
+
+                    logger.info(
+                        f"Consolidated cue {current['ad_cue_index']} with previous cue: "
+                        f"pause point {original_pause:.2f}s -> {previous_pause:.2f}s "
+                        f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
+                    )
+                    warnings.append(
+                        f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
+                        f"(pause points were {gap:.2f}s apart, playing back-to-back)"
+                    )
+
+            consolidated.append(current)
+
+        return consolidated
+
     def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
         """Find the gap type for a given timestamp."""
         for gap in gaps: