feat: consolidate AD cues with pause points within 5s of each other
If consecutive AD cues have pause points within 5 seconds, they now play back-to-back at the same pause point. This prevents AD from being inserted mid-sentence when cues are close together. Adds _consolidate_close_cues() method and consolidation_threshold parameter to refine_all_pause_points(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
0647c9c112
commit
c605cd1a88
1 changed files with 66 additions and 1 deletions
|
|
@ -215,7 +215,8 @@ class WhisperService:
|
|||
def refine_all_pause_points(
|
||||
self,
|
||||
placements: list[dict],
|
||||
gaps: list[SpeechGap]
|
||||
gaps: list[SpeechGap],
|
||||
consolidation_threshold: float = 5.0
|
||||
) -> tuple[list[dict], list[str]]:
|
||||
"""
|
||||
Refine all pause points in a Gemini analysis result.
|
||||
|
|
@ -223,6 +224,8 @@ class WhisperService:
|
|||
Args:
|
||||
placements: List of placement dicts from Gemini analysis
|
||||
gaps: Speech gaps from Whisper analysis
|
||||
consolidation_threshold: If consecutive cues have pause points within
|
||||
this many seconds, combine them to play back-to-back (default: 5.0s)
|
||||
|
||||
Returns:
|
||||
Tuple of (refined_placements, warnings)
|
||||
|
|
@ -264,8 +267,70 @@ class WhisperService:
|
|||
|
||||
refined_placements.append(refined)
|
||||
|
||||
# Consolidate cues that are close together to avoid mid-sentence insertions
|
||||
refined_placements = self._consolidate_close_cues(
|
||||
refined_placements, consolidation_threshold, warnings
|
||||
)
|
||||
|
||||
return refined_placements, warnings
|
||||
|
||||
def _consolidate_close_cues(
|
||||
self,
|
||||
placements: list[dict],
|
||||
threshold: float,
|
||||
warnings: list[str]
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Consolidate AD cues whose pause points are within threshold seconds of each other.
|
||||
|
||||
If a cue's pause_point is within `threshold` seconds of the previous cue's
|
||||
pause_point, set them to the same pause_point so they play back-to-back.
|
||||
This avoids inserting AD cues in the middle of sentences.
|
||||
|
||||
Args:
|
||||
placements: List of refined placement dicts
|
||||
threshold: Max seconds between pause points to trigger consolidation
|
||||
warnings: List to append warning messages to
|
||||
|
||||
Returns:
|
||||
Updated placements with consolidated pause points
|
||||
"""
|
||||
if len(placements) < 2:
|
||||
return placements
|
||||
|
||||
consolidated = [placements[0]]
|
||||
|
||||
for i in range(1, len(placements)):
|
||||
current = placements[i].copy()
|
||||
previous = consolidated[-1]
|
||||
|
||||
current_pause = current.get("pause_point")
|
||||
previous_pause = previous.get("pause_point")
|
||||
|
||||
if current_pause is not None and previous_pause is not None:
|
||||
gap = current_pause - previous_pause
|
||||
|
||||
if 0 < gap <= threshold:
|
||||
# Consolidate: set current cue to use same pause point as previous
|
||||
original_pause = current_pause
|
||||
current["pause_point"] = previous_pause
|
||||
current["consolidated_with_previous"] = True
|
||||
current["original_pause_point_before_consolidation"] = original_pause
|
||||
|
||||
logger.info(
|
||||
f"Consolidated cue {current['ad_cue_index']} with previous cue: "
|
||||
f"pause point {original_pause:.2f}s -> {previous_pause:.2f}s "
|
||||
f"(gap was {gap:.2f}s, threshold {threshold:.2f}s)"
|
||||
)
|
||||
warnings.append(
|
||||
f"Cue {current['ad_cue_index']}: Consolidated with previous cue "
|
||||
f"(pause points were {gap:.2f}s apart, playing back-to-back)"
|
||||
)
|
||||
|
||||
consolidated.append(current)
|
||||
|
||||
return consolidated
|
||||
|
||||
def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
|
||||
"""Find the gap type for a given timestamp."""
|
||||
for gap in gaps:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue