Skip to content

Commit 0becabc

Browse files
authored
stream.wasm : add language selection support (#3354)
* stream.wasm : add language selection support This commit adds support for selecting the language in the stream.wasm example. This is includes adding the model `base` which supports multilingual transcription, and allowing the user to select a language from a dropdown menu in the HTML interface. The motivation for this is that it allows users to transcribe audio in various languages. Refs: #3347 * squash! stream.wasm : add language selection support Remove strdup() for language in stream.wasm and update butten text for base (should not be "base.en" but just "base").
1 parent f7502dc commit 0becabc

File tree

2 files changed

+83
-7
lines changed

2 files changed

+83
-7
lines changed

examples/stream.wasm/emscripten.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ void stream_set_status(const std::string & status) {
3131
g_status = status;
3232
}
3333

34-
void stream_main(size_t index) {
34+
void stream_main(size_t index, const std::string & lang) {
3535
stream_set_status("loading data ...");
3636

3737
struct whisper_full_params wparams = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
38+
bool is_multilingual = whisper_is_multilingual(g_contexts[index]);
3839

3940
wparams.n_threads = std::min(N_THREAD, (int) std::thread::hardware_concurrency());
4041
wparams.offset_ms = 0;
@@ -52,7 +53,7 @@ void stream_main(size_t index) {
5253
// disable temperature fallback
5354
wparams.temperature_inc = -1.0f;
5455

55-
wparams.language = "en";
56+
wparams.language = is_multilingual ? lang.c_str() : "en";
5657

5758
printf("stream: using %d threads\n", wparams.n_threads);
5859

@@ -127,9 +128,8 @@ void stream_main(size_t index) {
127128
g_contexts[index] = nullptr;
128129
}
129130
}
130-
131131
EMSCRIPTEN_BINDINGS(stream) {
132-
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
132+
emscripten::function("init", emscripten::optional_override([](const std::string & path_model, const std::string & lang) {
133133
for (size_t i = 0; i < g_contexts.size(); ++i) {
134134
if (g_contexts[i] == nullptr) {
135135
g_contexts[i] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
@@ -138,8 +138,8 @@ EMSCRIPTEN_BINDINGS(stream) {
138138
if (g_worker.joinable()) {
139139
g_worker.join();
140140
}
141-
g_worker = std::thread([i]() {
142-
stream_main(i);
141+
g_worker = std::thread([i, lang]() {
142+
stream_main(i, lang);
143143
});
144144

145145
return i + 1;

examples/stream.wasm/index-tmpl.html

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
Whisper model: <span id="model-whisper-status"></span>
5656
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
5757
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
58+
<button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
5859
<br><br>
5960
Quantized models:<br><br>
6061
<button id="fetch-whisper-tiny-en-q5_1" onclick="loadWhisper('tiny-en-q5_1')">tiny.en (Q5_1, 31 MB)</button>
@@ -66,6 +67,77 @@
6667
-->
6768
</div>
6869

70+
<table>
71+
<tr>
72+
<td>
73+
Language:
74+
<select id="language" name="language">
75+
<option value="en">English</option>
76+
<option value="ar">Arabic</option>
77+
<option value="hy">Armenian</option>
78+
<option value="az">Azerbaijani</option>
79+
<option value="eu">Basque</option>
80+
<option value="be">Belarusian</option>
81+
<option value="bn">Bengali</option>
82+
<option value="bg">Bulgarian</option>
83+
<option value="ca">Catalan</option>
84+
<option value="zh">Chinese</option>
85+
<option value="hr">Croatian</option>
86+
<option value="cs">Czech</option>
87+
<option value="da">Danish</option>
88+
<option value="nl">Dutch</option>
89+
<option value="en">English</option>
90+
<option value="et">Estonian</option>
91+
<option value="tl">Filipino</option>
92+
<option value="fi">Finnish</option>
93+
<option value="fr">French</option>
94+
<option value="gl">Galician</option>
95+
<option value="ka">Georgian</option>
96+
<option value="de">German</option>
97+
<option value="el">Greek</option>
98+
<option value="gu">Gujarati</option>
99+
<option value="iw">Hebrew</option>
100+
<option value="hi">Hindi</option>
101+
<option value="hu">Hungarian</option>
102+
<option value="is">Icelandic</option>
103+
<option value="id">Indonesian</option>
104+
<option value="ga">Irish</option>
105+
<option value="it">Italian</option>
106+
<option value="ja">Japanese</option>
107+
<option value="kn">Kannada</option>
108+
<option value="ko">Korean</option>
109+
<option value="la">Latin</option>
110+
<option value="lv">Latvian</option>
111+
<option value="lt">Lithuanian</option>
112+
<option value="mk">Macedonian</option>
113+
<option value="ms">Malay</option>
114+
<option value="mt">Maltese</option>
115+
<option value="no">Norwegian</option>
116+
<option value="fa">Persian</option>
117+
<option value="pl">Polish</option>
118+
<option value="pt">Portuguese</option>
119+
<option value="ro">Romanian</option>
120+
<option value="ru">Russian</option>
121+
<option value="sr">Serbian</option>
122+
<option value="sk">Slovak</option>
123+
<option value="sl">Slovenian</option>
124+
<option value="es">Spanish</option>
125+
<option value="sw">Swahili</option>
126+
<option value="sv">Swedish</option>
127+
<option value="ta">Tamil</option>
128+
<option value="te">Telugu</option>
129+
<option value="th">Thai</option>
130+
<option value="tr">Turkish</option>
131+
<option value="uk">Ukrainian</option>
132+
<option value="ur">Urdu</option>
133+
<option value="vi">Vietnamese</option>
134+
<option value="cy">Welsh</option>
135+
<option value="yi">Yiddish</option>
136+
</select>
137+
</td>
138+
</tr>
139+
</table>
140+
69141
<br>
70142

71143
<div id="input">
@@ -176,6 +248,7 @@
176248
let urls = {
177249
'tiny.en': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en.bin',
178250
'base.en': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en.bin',
251+
'base' : 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
179252

180253
'tiny-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin',
181254
'base-en-q5_1': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin',
@@ -184,6 +257,7 @@
184257
let sizes = {
185258
'tiny.en': 75,
186259
'base.en': 142,
260+
'base': 142,
187261

188262
'tiny-en-q5_1': 31,
189263
'base-en-q5_1': 57,
@@ -197,6 +271,7 @@
197271

198272
document.getElementById('fetch-whisper-tiny-en').style.display = 'none';
199273
document.getElementById('fetch-whisper-base-en').style.display = 'none';
274+
document.getElementById('fetch-whisper-base').style.display = 'none';
200275

201276
document.getElementById('fetch-whisper-tiny-en-q5_1').style.display = 'none';
202277
document.getElementById('fetch-whisper-base-en-q5_1').style.display = 'none';
@@ -212,6 +287,7 @@
212287
var el;
213288
el = document.getElementById('fetch-whisper-tiny-en'); if (el) el.style.display = 'inline-block';
214289
el = document.getElementById('fetch-whisper-base-en'); if (el) el.style.display = 'inline-block';
290+
el = document.getElementById('fetch-whisper-base'); if (el) el.style.display = 'inline-block';
215291

216292
el = document.getElementById('fetch-whisper-tiny-en-q5_1'); if (el) el.style.display = 'inline-block';
217293
el = document.getElementById('fetch-whisper-base-en-q5_1'); if (el) el.style.display = 'inline-block';
@@ -368,7 +444,7 @@
368444

369445
function onStart() {
370446
if (!instance) {
371-
instance = Module.init('whisper.bin');
447+
instance = Module.init('whisper.bin', document.getElementById('language').value);
372448

373449
if (instance) {
374450
printTextarea("js: whisper initialized, instance: " + instance);

0 commit comments

Comments
 (0)