diff --git a/examples/cli/cli.cpp b/examples/cli/cli.cpp index f73ed9ae078..5aca1962aa7 100644 --- a/examples/cli/cli.cpp +++ b/examples/cli/cli.cpp @@ -727,7 +727,13 @@ static void output_json( value_s("text", whisper_token_to_str(ctx, token.id), false); if(token.t0 > -1 && token.t1 > -1) { // If we have per-token timestamps, write them out - times_o(token.t0, token.t1, false); + if (params.vad) { + times_o(vad_ts_to_original_ts(token.t0, ctx), + vad_ts_to_original_ts(token.t1, ctx), + false); + } else { + times_o(token.t0, token.t1, false); + } } value_i("id", token.id, false); value_f("p", token.p, false); diff --git a/include/whisper.h b/include/whisper.h index 4aeda98f334..e3e1dac761e 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -712,6 +712,8 @@ extern "C" { WHISPER_API float whisper_vad_segments_get_segment_t0(struct whisper_vad_segments * segments, int i_segment); WHISPER_API float whisper_vad_segments_get_segment_t1(struct whisper_vad_segments * segments, int i_segment); + WHISPER_API int64_t vad_ts_to_original_ts(int64_t vad_ts, struct whisper_context * ctx); + WHISPER_API void whisper_vad_free_segments(struct whisper_vad_segments * segments); WHISPER_API void whisper_vad_free (struct whisper_vad_context * ctx); diff --git a/src/whisper.cpp b/src/whisper.cpp index fe3e135bee6..f6664167244 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -7968,6 +7968,10 @@ int64_t whisper_full_get_segment_t1(struct whisper_context * ctx, int i_segment) return whisper_full_get_segment_t1_from_state(ctx->state, i_segment); } +int64_t vad_ts_to_original_ts(int64_t vad_ts, struct whisper_context * ctx) { + return map_processed_to_original_time(vad_ts, ctx->state->vad_mapping_table); +} + bool whisper_full_get_segment_speaker_turn_next_from_state(struct whisper_state * state, int i_segment) { return state->result_all[i_segment].speaker_turn_next; }