feat: dynamic pause point buffer based on gap duration
Instead of a fixed 175ms buffer, the pause point is now placed halfway between the end of the sentence and the start of the next word. If the half-gap exceeds 2 seconds, uses 500ms instead. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
70a07f3732
commit
504e525a1f
1 changed files with 17 additions and 5 deletions
|
|
@ -219,9 +219,21 @@ class WhisperService:
|
|||
|
||||
best_gap = candidate_gaps[0]
|
||||
|
||||
# Use gap.start (end of previous sentence) as the pause point
|
||||
# Add buffer (175ms) to ensure the sentence is fully heard
|
||||
refined = best_gap.start + 0.175
|
||||
# Calculate buffer: halfway between end of sentence and start of next word,
|
||||
# unless that would be more than 2 seconds, in which case use 500ms
|
||||
half_gap = best_gap.duration / 2
|
||||
if half_gap > 2.0:
|
||||
buffer = 0.5
|
||||
else:
|
||||
buffer = half_gap
|
||||
|
||||
# Use gap.start (end of previous sentence) + buffer as the pause point
|
||||
refined = best_gap.start + buffer
|
||||
|
||||
logger.debug(
|
||||
f"Pause point buffer: gap_duration={best_gap.duration:.3f}s, "
|
||||
f"half_gap={half_gap:.3f}s, buffer_used={buffer:.3f}s"
|
||||
)
|
||||
|
||||
return refined, None
|
||||
|
||||
|
|
@ -347,8 +359,8 @@ class WhisperService:
|
|||
def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str:
|
||||
"""Find the gap type for a given timestamp."""
|
||||
for gap in gaps:
|
||||
# Check if timestamp matches gap.start + 175ms buffer
|
||||
if abs(gap.start + 0.175 - timestamp) < 0.01:
|
||||
# Check if timestamp falls within this gap (between end of prev word and start of next)
|
||||
if gap.start <= timestamp <= gap.end:
|
||||
return gap.gap_type
|
||||
return "unknown"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue