feat: consolidate AD cues with pause points within 5s of each other

If consecutive AD cues have pause points within 5 seconds, they now play back-to-back at the same pause point. This prevents AD from being inserted mid-sentence when cues are close together. Adds _consolidate_close_cues() method and consolidation_threshold parameter to refine_all_pause_points(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 16:15:52 -06:00 · 2025-12-28 16:15:52 -06:00 · c605cd1a88
commit c605cd1a88
parent 0647c9c112
1 changed files with 66 additions and 1 deletions
--- a/backend/app/services/whisper_service.py
+++ b/backend/app/services/whisper_service.py
@ -215,7 +215,8 @@ class WhisperService:
    def refine_all_pause_points(
        self,
        placements: list[dict],
-        gaps: list[SpeechGap]
+        gaps: list[SpeechGap],
+        consolidation_threshold: float = 5.0
    ) -> tuple[list[dict], list[str]]:
        """
        Refine all pause points in a Gemini analysis result.
@ -223,6 +224,8 @@ class WhisperService:
        Args:
            placements: List of placement dicts from Gemini analysis
            gaps: Speech gaps from Whisper analysis
+            consolidation_threshold: If consecutive cues have pause points within
+                this many seconds, combine them to play back-to-back (default: 5.0s)

        Returns:
            Tuple of (refined_placements, warnings)
@ -264,8 +267,70 @@ class WhisperService:

            refined_placements.append(refined)

+        # Consolidate cues that are close together to avoid mid-sentence insertions
+        refined_placements = self._consolidate_close_cues(
+            refined_placements, consolidation_threshold, warnings
+        )
+
        return refined_placements, warnings

+    def _consolidate_close_cues(
+        self,
+        placements: list[dict],
+        threshold: float,
+        warnings: list[str]
+    ) -> list[dict]:
+        """
+        Consolidate AD cues whose pause points are within threshold seconds of each other.
+
+        If a cue's pause_point is within `threshold` seconds of the previous cue's
+        pause_point, set them to the same pause_point so they play back-to-back.
+        This avoids inserting AD cues in the middle of sentences.
+
+        Args:
+            placements: List of refined placement dicts
+            threshold: Max seconds between pause points to trigger consolidation
+            warnings: List to append warning messages to
+
+        Returns:
+            Updated placements with consolidated pause points
+        """
+        if len(placements) < 2:
+            return placements
+
+        consolidated = [placements[0]]
+
+        for i in range(1, len(placements)):
+            current = placements[i].copy()
+            previous = consolidated[-1]
+
+            current_pause = current.get("pause_point")
+            previous_pause = previous.get("pause_point")
+
+            if current_pause is not None and previous_pause is not None:
+                gap = current_pause - previous_pause
+
+                if 0 < gap <= threshold:
+                    # Consolidate: set current cue to use same pause point as previous
+                    original_pause = current_pause
+                    current["pause_point"] = previous_pause
+                    current["consolidated_with_previous"] = True
+                    current["original_pause_point_before_consolidation"] = original_pause
+
+                    logger.info(
+                        f"Consolidated cue {current['ad_cue_index']} with previous cue: "
+                        f"pause point {original_pause:.2f}s -> {previous_pause:.2f}s "
+                        f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
+                    )
+                    warnings.append(
+                        f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
+                        f"(pause points were {gap:.2f}s apart, playing back-to-back)"
+                    )
+
+            consolidated.append(current)
+
+        return consolidated
+
    def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
        """Find the gap type for a given timestamp."""
        for gap in gaps: