Skip to content

Commit 8fb4ef3

Browse files
author
litongjava
committed
add stream_components.cpp
1 parent d528ac2 commit 8fb4ef3

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@ add_executable(web_socket_server main.cpp web_socket_server.cpp)
2222
# 链接 whisper.cpp 库
2323
target_link_libraries(web_socket_server whisper ${SDL2_LIBRARIES})
2424

25+
add_executable(stream_components stream_components.cpp)
26+
target_link_libraries(stream_components whisper ${SDL2_LIBRARIES})
27+
2528

2629

stream_components.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#include <iostream>
2+
#include "stream_components_audio.h"
3+
#include "stream_components_params.h"
4+
#include "stream_components_output.h"
5+
#include "stream_components_server.h"
6+
7+
using namespace stream_components;
8+
9+
struct whisper_params {
10+
audio_params audio;
11+
server_params server;
12+
13+
void initialize() {
14+
audio.initialize();
15+
server.initialize();
16+
}
17+
};
18+
19+
20+
void whisper_print_usage(int argc, char **argv, const whisper_params &params);
21+
22+
bool whisper_params_parse(int argc, char **argv, whisper_params &params) {
23+
for (int i = 1; i < argc; i++) {
24+
std::string arg = argv[i];
25+
26+
if (arg == "-h" || arg == "--help") {
27+
whisper_print_usage(argc, argv, params);
28+
exit(0);
29+
} else if (arg == "-t" || arg == "--threads") { params.server.n_threads = std::stoi(argv[++i]); }
30+
else if (arg == "--step") { params.audio.step_ms = std::stoi(argv[++i]); }
31+
else if (arg == "--length") { params.audio.length_ms = std::stoi(argv[++i]); }
32+
else if (arg == "--keep") { params.audio.keep_ms = std::stoi(argv[++i]); }
33+
else if (arg == "-c" || arg == "--capture") { params.audio.capture_id = std::stoi(argv[++i]); }
34+
//else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
35+
else if (arg == "-ac" || arg == "--audio-ctx") { params.audio.audio_ctx = std::stoi(argv[++i]); }
36+
else if (arg == "-vth" || arg == "--vad-thold") { params.audio.vad_thold = std::stof(argv[++i]); }
37+
else if (arg == "-fth" || arg == "--freq-thold") { params.audio.freq_thold = std::stof(argv[++i]); }
38+
else if (arg == "-su" || arg == "--speed-up") { params.server.speed_up = true; }
39+
else if (arg == "-tr" || arg == "--translate") { params.server.translate = true; }
40+
else if (arg == "-nf" || arg == "--no-fallback") { params.server.no_fallback = true; }
41+
//else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
42+
else if (arg == "-kc" || arg == "--keep-context") { params.server.no_context = false; }
43+
else if (arg == "-l" || arg == "--language") { params.server.language = argv[++i]; }
44+
else if (arg == "-m" || arg == "--model") { params.server.model = argv[++i]; }
45+
//else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
46+
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.server.tinydiarize = true; }
47+
//else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
48+
49+
else {
50+
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
51+
whisper_print_usage(argc, argv, params);
52+
exit(0);
53+
}
54+
}
55+
56+
return true;
57+
}
58+
59+
void whisper_print_usage(int /*argc*/, char **argv, const whisper_params &params) {
60+
fprintf(stderr, "\n");
61+
fprintf(stderr, "usage: %s [options]\n", argv[0]);
62+
fprintf(stderr, "\n");
63+
fprintf(stderr, "options:\n");
64+
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
65+
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n",
66+
params.server.n_threads);
67+
fprintf(stderr, " --step N [%-7d] audio step size in milliseconds\n", params.audio.step_ms);
68+
fprintf(stderr, " --length N [%-7d] audio length in milliseconds\n", params.audio.length_ms);
69+
fprintf(stderr, " --keep N [%-7d] audio to keep from previous step in ms\n", params.audio.keep_ms);
70+
fprintf(stderr, " -c ID, --capture ID [%-7d] capture device ID\n", params.audio.capture_id);
71+
//fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
72+
fprintf(stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n", params.audio.audio_ctx);
73+
fprintf(stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n", params.audio.vad_thold);
74+
fprintf(stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n", params.audio.freq_thold);
75+
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n",
76+
params.server.speed_up ? "true" : "false");
77+
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n",
78+
params.server.translate ? "true" : "false");
79+
fprintf(stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n",
80+
params.server.no_fallback ? "true" : "false");
81+
//fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
82+
fprintf(stderr, " -kc, --keep-context [%-7s] keep context between audio chunks\n",
83+
params.server.no_context ? "false" : "true");
84+
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.server.language.c_str());
85+
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.server.model.c_str());
86+
//fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
87+
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n",
88+
params.server.tinydiarize ? "true" : "false");
89+
//fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
90+
fprintf(stderr, "\n");
91+
}
92+
93+
int main(int argc, char **argv) {
94+
95+
// Read parameters...
96+
whisper_params params;
97+
98+
if (whisper_params_parse(argc, argv, params) == false) {
99+
return 1;
100+
}
101+
102+
// Compute derived parameters
103+
params.initialize();
104+
105+
// Check parameters
106+
if (params.server.language != "auto" && whisper_lang_id(params.server.language.c_str()) == -1) {
107+
fprintf(stderr, "error: unknown language '%s'\n", params.server.language.c_str());
108+
whisper_print_usage(argc, argv, params);
109+
exit(0);
110+
}
111+
112+
// Instantiate the audio input
113+
stream_components::LocalSDLMicrophone audio(params.audio);
114+
115+
// Instantiate the server
116+
stream_components::WhisperServer server(params.server, params.audio);
117+
118+
// Print the 'header'...
119+
WhisperOutput::server_to_json(std::cout, params.server, server.ctx);
120+
121+
// Run until Ctrl + C
122+
bool is_running = true;
123+
while (is_running) {
124+
125+
// handle Ctrl + C
126+
is_running = sdl_poll_events();
127+
if (!is_running) {
128+
break;
129+
}
130+
131+
// get next audio section
132+
auto pcmf32 = audio.get_next();
133+
134+
// get the whisper output
135+
auto result = server.process(pcmf32.data(), pcmf32.size());
136+
137+
// write the output as json to stdout (for this example)
138+
if (result) {
139+
result->transcription_to_json(std::cout);
140+
}
141+
}
142+
143+
std::cout << "EXITED MAIN LOOP" << std::endl;
144+
return 0;
145+
}

0 commit comments

Comments
 (0)