1
- #include < cstdio>
2
- #include < cstdlib>
3
1
#include < iostream>
4
- #include < memory>
5
- #include < filesystem>
6
-
7
- int main (int argc, char **argv) {
8
- // default cmake-build-debug/main
9
- const char filename[] = " ../pcm/16k_1.pcm" ;
10
- const char output_dir[] = " output_pcm" ;
11
- const char output_filename_prefix[] = " 16k_1.pcm" ;
12
- if (!std::filesystem::exists (output_dir)) {
13
- std::filesystem::create_directories (output_dir);
2
+ #include < vector>
3
+ #include < cstdint>
4
+ #include < whisper.h>
5
+
6
+ #include " ../stream/stream_components_service.h"
7
+ #include " ../stream/stream_components.h"
8
+ #include " ../common/utils.h"
9
+ #include " ../common/common.h"
10
+ #include < speex/speex_preprocess.h>
11
+
12
+ using namespace stream_components ;
13
+
14
+
15
+ int main () {
16
+ std::string wav_file_path = " ../samples/jfk.wav" ; // 替换为您的 WAV 文件路径
17
+ // audio arrays
18
+ std::vector<float > pcmf32; // mono-channel F32 PCM
19
+ std::vector<std::vector<float >> pcmf32s; // stereo-channel F32 PCM
20
+ ::read_wav (wav_file_path, pcmf32, pcmf32s, false );
21
+
22
+ printf (" size of samples %lu\n " , pcmf32.size ());
23
+
24
+
25
+ whisper_local_stream_params params;
26
+ struct whisper_context_params cparams{};
27
+ cparams.use_gpu = params.service .use_gpu ;
28
+ // Instantiate the service
29
+ stream_components::WhisperService whisperService (params.service , params.audio , cparams);
30
+
31
+ // Simulate websokcet by adding 1500 data each time.
32
+ std::vector<float > audio_buffer;
33
+ int chunk_size = 160 ; // 适用于 16 kHz 采样率的 100 毫秒帧
34
+ SpeexPreprocessState *st = speex_preprocess_state_init (chunk_size, WHISPER_SAMPLE_RATE);
35
+
36
+ int vad = 1 ;
37
+ speex_preprocess_ctl (st, SPEEX_PREPROCESS_SET_VAD, &vad);
38
+
39
+ bool last_is_speech = false ;
40
+ // 处理音频帧
41
+ for (size_t i = 0 ; i < pcmf32.size (); i += chunk_size) {
42
+ spx_int16_t frame[chunk_size];
43
+ for (int j = 0 ; j < chunk_size; ++j) {
44
+ if (i + j < pcmf32.size ()) {
45
+ frame[j] = (spx_int16_t )(pcmf32[i + j] * 32768 );
46
+ } else {
47
+ frame[j] = 0 ; // 对于超出范围的部分填充 0
48
+ }
49
+ }
50
+ int is_speech = speex_preprocess_run (st, frame);
51
+
52
+ // 将当前帧添加到 audio_buffer
53
+ audio_buffer.insert (audio_buffer.end (), pcmf32.begin () + i, pcmf32.begin () + std::min (i + chunk_size, pcmf32.size ()));
54
+ printf (" is_speech %d \n " ,is_speech);
55
+ if (!is_speech && last_is_speech) {
56
+ bool b = whisperService.process (pcmf32.data (), pcmf32.size ());
57
+ const nlohmann::json &json_array = get_result (whisperService.ctx );
58
+ const std::basic_string<char , std::char_traits<char >, std::allocator<char >> &string = json_array.dump ();
59
+ printf (" %s\n " ,string.c_str ());
60
+ return 0 ;
61
+ audio_buffer.clear ();
62
+ }
63
+
64
+ last_is_speech = is_speech != 0 ;
14
65
}
66
+
67
+ speex_preprocess_state_destroy (st);
15
68
}
0 commit comments