From c605cd1a88ba17c0816d183d8f1b86b29cb5f45a Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 28 Dec 2025 16:15:52 -0600 Subject: [PATCH] feat: consolidate AD cues with pause points within 5s of each other MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If consecutive AD cues have pause points within 5 seconds, they now play back-to-back at the same pause point. This prevents AD from being inserted mid-sentence when cues are close together. Adds _consolidate_close_cues() method and consolidation_threshold parameter to refine_all_pause_points(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/services/whisper_service.py | 67 ++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 9dc90ff..b478814 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -215,7 +215,8 @@ class WhisperService: def refine_all_pause_points( self, placements: list[dict], - gaps: list[SpeechGap] + gaps: list[SpeechGap], + consolidation_threshold: float = 5.0 ) -> tuple[list[dict], list[str]]: """ Refine all pause points in a Gemini analysis result. @@ -223,6 +224,8 @@ class WhisperService: Args: placements: List of placement dicts from Gemini analysis gaps: Speech gaps from Whisper analysis + consolidation_threshold: If consecutive cues have pause points within + this many seconds, combine them to play back-to-back (default: 5.0s) Returns: Tuple of (refined_placements, warnings) @@ -264,8 +267,70 @@ class WhisperService: refined_placements.append(refined) + # Consolidate cues that are close together to avoid mid-sentence insertions + refined_placements = self._consolidate_close_cues( + refined_placements, consolidation_threshold, warnings + ) + return refined_placements, warnings + def _consolidate_close_cues( + self, + placements: list[dict], + threshold: float, + warnings: list[str] + ) -> list[dict]: + """ + Consolidate AD cues whose pause points are within threshold seconds of each other. + + If a cue's pause_point is within `threshold` seconds of the previous cue's + pause_point, set them to the same pause_point so they play back-to-back. + This avoids inserting AD cues in the middle of sentences. + + Args: + placements: List of refined placement dicts + threshold: Max seconds between pause points to trigger consolidation + warnings: List to append warning messages to + + Returns: + Updated placements with consolidated pause points + """ + if len(placements) < 2: + return placements + + consolidated = [placements[0]] + + for i in range(1, len(placements)): + current = placements[i].copy() + previous = consolidated[-1] + + current_pause = current.get("pause_point") + previous_pause = previous.get("pause_point") + + if current_pause is not None and previous_pause is not None: + gap = current_pause - previous_pause + + if 0 < gap <= threshold: + # Consolidate: set current cue to use same pause point as previous + original_pause = current_pause + current["pause_point"] = previous_pause + current["consolidated_with_previous"] = True + current["original_pause_point_before_consolidation"] = original_pause + + logger.info( + f"Consolidated cue {current['ad_cue_index']} with previous cue: " + f"pause point {original_pause:.2f}s -> {previous_pause:.2f}s " + f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)" + ) + warnings.append( + f"Cue {current['ad_cue_index']}: Consolidated with previous cue " + f"(pause points were {gap:.2f}s apart, playing back-to-back)" + ) + + consolidated.append(current) + + return consolidated + def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str: """Find the gap type for a given timestamp.""" for gap in gaps: