@@ -4347,7 +4347,9 @@ const char * whisper_print_system_info(void) {
43474347 return s.c_str ();
43484348}
43494349
4350+ // ////////////////////////////////
43504351// Voice Activity Detection (VAD)
4352+ // ////////////////////////////////
43514353
43524354enum e_vad_model {
43534355 VAD_MODEL_SILERO_16K,
@@ -5098,6 +5100,7 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
50985100 return nullptr ;
50995101 }
51005102
5103+ /* TODO(danbev) Remove the printing of weights once things are working.
51015104 {
51025105 // Print as F16
51035106 struct ggml_tensor * tensor = model.stft_forward_basis;
@@ -5262,6 +5265,7 @@ whisper_vad_context * whisper_vad_init_from_file_with_params_no_state(
52625265 WHISPER_LOG_INFO("%s: final_conv_bias: [%d]: %f\n", __func__, i, read_b[i]);
52635266 }
52645267 }
5268+ */
52655269 }
52665270
52675271 return vctx;
@@ -5339,6 +5343,7 @@ struct whisper_vad_speech whisper_vad_detect_speech(struct whisper_vad_context *
53395343 ggml_backend_tensor_set (c_in, c_state.data (), 0 , hidden_dim * sizeof (float ));
53405344
53415345 // Log the frame and hidden stated before computing the graph
5346+ /*
53425347 {
53435348 struct ggml_tensor * tensor = ggml_graph_get_tensor(gf, "frame");
53445349 WHISPER_LOG_INFO("%s: frame shape [%lld, %lld, %lld, %lld]\n", __func__, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
@@ -5348,6 +5353,7 @@ struct whisper_vad_speech whisper_vad_detect_speech(struct whisper_vad_context *
53485353 WHISPER_LOG_INFO("%s: input: [%d]: %f\n", __func__, i, read_b[i]);
53495354 }
53505355 }
5356+ */
53515357 /*
53525358
53535359 {
@@ -5519,10 +5525,12 @@ struct whisper_vad_speech whisper_vad_detect_speech(struct whisper_vad_context *
55195525 // }
55205526 }
55215527 WHISPER_LOG_INFO (" %s: finished processing %d samples\n " , __func__, n_samples);
5528+ /*
55225529 for (int i = 0; i < n_chunks; i++) {
55235530 //for (size_t i = 0; i < 2; i++) {
55245531 WHISPER_LOG_INFO("%s: prob[%d]: %f\n", __func__, i, probs[i]);
55255532 }
5533+ */
55265534
55275535 struct whisper_vad_speech speech = {
55285536 /* n_probs = */ n_chunks,
0 commit comments