feat: dynamic pause point buffer based on gap duration

Instead of a fixed 175ms buffer, the pause point is now placed
halfway between the end of the sentence and the start of the
next word. If the half-gap exceeds 2 seconds, uses 500ms instead.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-28 22:33:07 -06:00
parent 70a07f3732
commit 504e525a1f

View file

@ -219,9 +219,21 @@ class WhisperService:
best_gap = candidate_gaps[0]
# Use gap.start (end of previous sentence) as the pause point
# Add buffer (175ms) to ensure the sentence is fully heard
refined = best_gap.start + 0.175
# Calculate buffer: halfway between end of sentence and start of next word,
# unless that would be more than 2 seconds, in which case use 500ms
half_gap = best_gap.duration / 2
if half_gap > 2.0:
buffer = 0.5
else:
buffer = half_gap
# Use gap.start (end of previous sentence) + buffer as the pause point
refined = best_gap.start + buffer
logger.debug(
f"Pause point buffer: gap_duration={best_gap.duration:.3f}s, "
f"half_gap={half_gap:.3f}s, buffer_used={buffer:.3f}s"
)
return refined, None
@ -347,8 +359,8 @@ class WhisperService:
def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
"""Find the gap type for a given timestamp."""
for gap in gaps:
# Check if timestamp matches gap.start + 175ms buffer
if abs(gap.start + 0.175 - timestamp) < 0.01:
# Check if timestamp falls within this gap (between end of prev word and start of next)
if gap.start <= timestamp <= gap.end:
return gap.gap_type
return "unknown"