1
- #include < iostream>
2
- #include " stream_components_audio.h"
3
- #include " stream_components_params.h"
4
- #include " stream_components_output.h"
5
- #include " stream_components_service.h"
1
+ #pragma once
6
2
7
- using namespace stream_components ;
3
+ namespace stream_components {
4
+ struct whisper_local_stream_params {
5
+ audio_params audio;
6
+ service_params service;
8
7
9
- struct whisper_params {
10
- audio_params audio;
11
- service_params server;
12
-
13
- void initialize () {
14
- audio.initialize ();
15
- server.initialize ();
16
- }
17
- };
18
-
19
-
20
- void whisper_print_usage (int argc, char **argv, const whisper_params ¶ms);
21
-
22
- bool whisper_params_parse (int argc, char **argv, whisper_params ¶ms) {
23
- for (int i = 1 ; i < argc; i++) {
24
- std::string arg = argv[i];
25
-
26
- if (arg == " -h" || arg == " --help" ) {
27
- whisper_print_usage (argc, argv, params);
28
- exit (0 );
29
- } else if (arg == " -t" || arg == " --threads" ) { params.server .n_threads = std::stoi (argv[++i]); }
30
- else if (arg == " --step" ) { params.audio .step_ms = std::stoi (argv[++i]); }
31
- else if (arg == " --length" ) { params.audio .length_ms = std::stoi (argv[++i]); }
32
- else if (arg == " --keep" ) { params.audio .keep_ms = std::stoi (argv[++i]); }
33
- else if (arg == " -c" || arg == " --capture" ) { params.audio .capture_id = std::stoi (argv[++i]); }
34
- // else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
35
- else if (arg == " -ac" || arg == " --audio-ctx" ) { params.audio .audio_ctx = std::stoi (argv[++i]); }
36
- else if (arg == " -vth" || arg == " --vad-thold" ) { params.audio .vad_thold = std::stof (argv[++i]); }
37
- else if (arg == " -fth" || arg == " --freq-thold" ) { params.audio .freq_thold = std::stof (argv[++i]); }
38
- else if (arg == " -su" || arg == " --speed-up" ) { params.server .speed_up = true ; }
39
- else if (arg == " -tr" || arg == " --translate" ) { params.server .translate = true ; }
40
- else if (arg == " -nf" || arg == " --no-fallback" ) { params.server .no_fallback = true ; }
41
- // else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
42
- else if (arg == " -kc" || arg == " --keep-context" ) { params.server .no_context = false ; }
43
- else if (arg == " -l" || arg == " --language" ) { params.server .language = argv[++i]; }
44
- else if (arg == " -m" || arg == " --model" ) { params.server .model = argv[++i]; }
45
- // else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
46
- else if (arg == " -tdrz" || arg == " --tinydiarize" ) { params.server .tinydiarize = true ; }
47
- // else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
48
-
49
- else {
50
- fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
51
- whisper_print_usage (argc, argv, params);
52
- exit (0 );
8
+ void initialize () {
9
+ audio.initialize ();
10
+ service.initialize ();
53
11
}
12
+ };
13
+
14
+ void whisper_print_usage (int /* argc*/ , char **argv, const whisper_local_stream_params ¶ms) {
15
+ fprintf (stderr, " \n " );
16
+ fprintf (stderr, " usage: %s [options]\n " , argv[0 ]);
17
+ fprintf (stderr, " \n " );
18
+ fprintf (stderr, " options:\n " );
19
+ fprintf (stderr, " -h, --help [default] show this help message and exit\n " );
20
+ fprintf (stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n " ,
21
+ params.service .n_threads );
22
+ fprintf (stderr, " --step N [%-7d] audio step size in milliseconds\n " , params.audio .step_ms );
23
+ fprintf (stderr, " --length N [%-7d] audio length in milliseconds\n " , params.audio .length_ms );
24
+ fprintf (stderr, " --keep N [%-7d] audio to keep from previous step in ms\n " ,
25
+ params.audio .keep_ms );
26
+ fprintf (stderr, " -c ID, --capture ID [%-7d] capture device ID\n " , params.audio .capture_id );
27
+ // fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
28
+ fprintf (stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n " , params.audio .audio_ctx );
29
+ fprintf (stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n " ,
30
+ params.audio .vad_thold );
31
+ fprintf (stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n " , params.audio .freq_thold );
32
+ fprintf (stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n " ,
33
+ params.service .speed_up ? " true" : " false" );
34
+ fprintf (stderr, " -tr, --translate [%-7s] translate from source language to english\n " ,
35
+ params.service .translate ? " true" : " false" );
36
+ fprintf (stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n " ,
37
+ params.service .no_fallback ? " true" : " false" );
38
+ // fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
39
+ fprintf (stderr, " -kc, --keep-context [%-7s] keep context between audio chunks\n " ,
40
+ params.service .no_context ? " false" : " true" );
41
+ fprintf (stderr, " -l LANG, --language LANG [%-7s] spoken language\n " , params.service .language .c_str ());
42
+ fprintf (stderr, " -m FNAME, --model FNAME [%-7s] model path\n " , params.service .model .c_str ());
43
+ // fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
44
+ fprintf (stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n " ,
45
+ params.service .tinydiarize ? " true" : " false" );
46
+ // fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
47
+ fprintf (stderr, " \n " );
54
48
}
55
49
56
- return true ;
57
- }
58
-
59
- void whisper_print_usage (int /* argc*/ , char **argv, const whisper_params ¶ms) {
60
- fprintf (stderr, " \n " );
61
- fprintf (stderr, " usage: %s [options]\n " , argv[0 ]);
62
- fprintf (stderr, " \n " );
63
- fprintf (stderr, " options:\n " );
64
- fprintf (stderr, " -h, --help [default] show this help message and exit\n " );
65
- fprintf (stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n " ,
66
- params.server .n_threads );
67
- fprintf (stderr, " --step N [%-7d] audio step size in milliseconds\n " , params.audio .step_ms );
68
- fprintf (stderr, " --length N [%-7d] audio length in milliseconds\n " , params.audio .length_ms );
69
- fprintf (stderr, " --keep N [%-7d] audio to keep from previous step in ms\n " , params.audio .keep_ms );
70
- fprintf (stderr, " -c ID, --capture ID [%-7d] capture device ID\n " , params.audio .capture_id );
71
- // fprintf(stderr, " -mt N, --max-tokens N [%-7d] maximum number of tokens per audio chunk\n", params.max_tokens);
72
- fprintf (stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n " , params.audio .audio_ctx );
73
- fprintf (stderr, " -vth N, --vad-thold N [%-7.2f] voice activity detection threshold\n " , params.audio .vad_thold );
74
- fprintf (stderr, " -fth N, --freq-thold N [%-7.2f] high-pass frequency cutoff\n " , params.audio .freq_thold );
75
- fprintf (stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n " ,
76
- params.server .speed_up ? " true" : " false" );
77
- fprintf (stderr, " -tr, --translate [%-7s] translate from source language to english\n " ,
78
- params.server .translate ? " true" : " false" );
79
- fprintf (stderr, " -nf, --no-fallback [%-7s] do not use temperature fallback while decoding\n " ,
80
- params.server .no_fallback ? " true" : " false" );
81
- // fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
82
- fprintf (stderr, " -kc, --keep-context [%-7s] keep context between audio chunks\n " ,
83
- params.server .no_context ? " false" : " true" );
84
- fprintf (stderr, " -l LANG, --language LANG [%-7s] spoken language\n " , params.server .language .c_str ());
85
- fprintf (stderr, " -m FNAME, --model FNAME [%-7s] model path\n " , params.server .model .c_str ());
86
- // fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
87
- fprintf (stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n " ,
88
- params.server .tinydiarize ? " true" : " false" );
89
- // fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
90
- fprintf (stderr, " \n " );
91
- }
92
-
93
- int main (int argc, char **argv) {
94
-
95
- // Read parameters...
96
- whisper_params params;
97
-
98
- if (whisper_params_parse (argc, argv, params) == false ) {
99
- return 1 ;
100
- }
101
-
102
- // Compute derived parameters
103
- params.initialize ();
104
-
105
- // Check parameters
106
- if (params.server .language != " auto" && whisper_lang_id (params.server .language .c_str ()) == -1 ) {
107
- fprintf (stderr, " error: unknown language '%s'\n " , params.server .language .c_str ());
108
- whisper_print_usage (argc, argv, params);
109
- exit (0 );
110
- }
111
-
112
- // Instantiate the audio input
113
- stream_components::LocalSDLMicrophone audio (params.audio );
114
-
115
- // Instantiate the server
116
- stream_components::WhisperServer server (params.server , params.audio );
117
-
118
- // Print the 'header'...
119
- WhisperOutput::server_to_json (std::cout, params.server , server.ctx );
120
-
121
- // Run until Ctrl + C
122
- bool is_running = true ;
123
- while (is_running) {
124
-
125
- // handle Ctrl + C
126
- is_running = sdl_poll_events ();
127
- if (!is_running) {
128
- break ;
50
+ bool whisper_params_parse (int argc, char **argv, whisper_local_stream_params ¶ms) {
51
+ for (int i = 1 ; i < argc; i++) {
52
+ std::string arg = argv[i];
53
+
54
+ if (arg == " -h" || arg == " --help" ) {
55
+ whisper_print_usage (argc, argv, params);
56
+ exit (0 );
57
+ } else if (arg == " -t" || arg == " --threads" ) { params.service .n_threads = std::stoi (argv[++i]); }
58
+ else if (arg == " --step" ) { params.audio .step_ms = std::stoi (argv[++i]); }
59
+ else if (arg == " --length" ) { params.audio .length_ms = std::stoi (argv[++i]); }
60
+ else if (arg == " --keep" ) { params.audio .keep_ms = std::stoi (argv[++i]); }
61
+ else if (arg == " -c" || arg == " --capture" ) { params.audio .capture_id = std::stoi (argv[++i]); }
62
+ // else if (arg == "-mt" || arg == "--max-tokens") { params.max_tokens = std::stoi(argv[++i]); }
63
+ else if (arg == " -ac" || arg == " --audio-ctx" ) { params.audio .audio_ctx = std::stoi (argv[++i]); }
64
+ else if (arg == " -vth" || arg == " --vad-thold" ) { params.audio .vad_thold = std::stof (argv[++i]); }
65
+ else if (arg == " -fth" || arg == " --freq-thold" ) { params.audio .freq_thold = std::stof (argv[++i]); }
66
+ else if (arg == " -su" || arg == " --speed-up" ) { params.service .speed_up = true ; }
67
+ else if (arg == " -tr" || arg == " --translate" ) { params.service .translate = true ; }
68
+ else if (arg == " -nf" || arg == " --no-fallback" ) { params.service .no_fallback = true ; }
69
+ // else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
70
+ else if (arg == " -kc" || arg == " --keep-context" ) { params.service .no_context = false ; }
71
+ else if (arg == " -l" || arg == " --language" ) { params.service .language = argv[++i]; }
72
+ else if (arg == " -m" || arg == " --model" ) { params.service .model = argv[++i]; }
73
+ // else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
74
+ else if (arg == " -tdrz" || arg == " --tinydiarize" ) { params.service .tinydiarize = true ; }
75
+ // else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
76
+
77
+ else {
78
+ fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
79
+ whisper_print_usage (argc, argv, params);
80
+ exit (0 );
81
+ }
129
82
}
130
83
131
- // get next audio section
132
- auto pcmf32 = audio.get_next ();
133
-
134
- // get the whisper output
135
- auto result = server.process (pcmf32.data (), pcmf32.size ());
136
-
137
- // write the output as json to stdout (for this example)
138
- if (result) {
139
- result->transcription_to_json (std::cout);
140
- }
84
+ return true ;
141
85
}
142
-
143
- std::cout << " EXITED MAIN LOOP" << std::endl;
144
- return 0 ;
145
86
}
0 commit comments