diff --git a/backend/app/services/whisper_service.py b/backend/app/services/whisper_service.py index 0c127e3..9204309 100644 --- a/backend/app/services/whisper_service.py +++ b/backend/app/services/whisper_service.py @@ -219,9 +219,21 @@ class WhisperService: best_gap = candidate_gaps[0] - # Use gap.start (end of previous sentence) as the pause point - # Add buffer (175ms) to ensure the sentence is fully heard - refined = best_gap.start + 0.175 + # Calculate buffer: halfway between end of sentence and start of next word, + # unless that would be more than 2 seconds, in which case use 500ms + half_gap = best_gap.duration / 2 + if half_gap > 2.0: + buffer = 0.5 + else: + buffer = half_gap + + # Use gap.start (end of previous sentence) + buffer as the pause point + refined = best_gap.start + buffer + + logger.debug( + f"Pause point buffer: gap_duration={best_gap.duration:.3f}s, " + f"half_gap={half_gap:.3f}s, buffer_used={buffer:.3f}s" + ) return refined, None @@ -347,8 +359,8 @@ class WhisperService: def _find_gap_type(self, timestamp: float, gaps: list[SpeechGap]) -> str: """Find the gap type for a given timestamp.""" for gap in gaps: - # Check if timestamp matches gap.start + 175ms buffer - if abs(gap.start + 0.175 - timestamp) < 0.01: + # Check if timestamp falls within this gap (between end of prev word and start of next) + if gap.start <= timestamp <= gap.end: return gap.gap_type return "unknown"