Skip to content

Commit 549db93

Browse files
authored
whisper : reduce delta_min from 1000ms to 100ms (#3028)
ggml-ci
1 parent 33a25e4 commit 549db93

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

src/whisper.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5527,11 +5527,13 @@ int whisper_full_with_state(
55275527
const int seek_start = params.offset_ms/10;
55285528
const int seek_end = params.duration_ms == 0 ? whisper_n_len_from_state(state) : seek_start + params.duration_ms/10;
55295529

5530-
// if length of spectrogram is less than 1.0s (100 frames), then return
5531-
// basically don't process anything that is less than 1.0s
5532-
// see issue #39: https://github.com/ggml-org/whisper.cpp/issues/39
5533-
if (seek_end < seek_start + 100) {
5534-
WHISPER_LOG_WARN("%s: input is too short - %d ms < 1000 ms. consider padding the input audio with silence\n", __func__, (seek_end - seek_start)*10);
5530+
// if length of spectrogram is less than 100ms (10 frames), then return
5531+
// basically don't process anything that is less than 100ms
5532+
// ref: https://github.com/ggml-org/whisper.cpp/issues/2065
5533+
const int delta_min = 10;
5534+
5535+
if (seek_end < seek_start + delta_min) {
5536+
WHISPER_LOG_WARN("%s: input is too short - %d ms < 100 ms. consider padding the input audio with silence\n", __func__, (seek_end - seek_start)*10);
55355537
return 0;
55365538
}
55375539

@@ -5675,8 +5677,8 @@ int whisper_full_with_state(
56755677
ctx, state, progress_cur, params.progress_callback_user_data);
56765678
}
56775679

5678-
// if only 1 second left, then stop
5679-
if (seek + 100 >= seek_end) {
5680+
// if only 100ms left, then stop
5681+
if (seek + delta_min >= seek_end) {
56805682
break;
56815683
}
56825684

@@ -6023,10 +6025,10 @@ int whisper_full_with_state(
60236025
// end of segment
60246026
if (token.id == whisper_token_eot(ctx) || // end of text token
60256027
(params.max_tokens > 0 && i >= params.max_tokens) || // max tokens per segment reached
6026-
(has_ts && seek + seek_delta + 100 >= seek_end) // end of audio reached
6028+
(has_ts && seek + seek_delta + delta_min >= seek_end) // end of audio reached (100ms)
60276029
) {
60286030
if (result_len == 0 && !params.no_timestamps) {
6029-
if (seek + seek_delta + 100 >= seek_end) {
6031+
if (seek + seek_delta + delta_min >= seek_end) {
60306032
result_len = i + 1;
60316033
} else {
60326034
WHISPER_LOG_DEBUG("%s: decoder %d failed (result_len = 0)\n", __func__, j);

0 commit comments

Comments
 (0)