Skip to content

Commit 8b989a8

Browse files
authored
Fix keyword spotting. (#1689)
Reset the stream right after detecting a keyword
1 parent b943341 commit 8b989a8

File tree

43 files changed

+823
-303
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+823
-303
lines changed

.github/scripts/test-python.sh

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -574,29 +574,6 @@ echo "sherpa_onnx version: $sherpa_onnx_version"
574574
pwd
575575
ls -lh
576576

577-
repo=sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
578-
log "Start testing ${repo}"
579-
580-
pushd $dir
581-
curl -LS -O https://github.com/pkufool/keyword-spotting-models/releases/download/v0.1/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
582-
tar xf sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
583-
rm sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01.tar.bz
584-
popd
585-
586-
repo=$dir/$repo
587-
ls -lh $repo
588-
589-
python3 ./python-api-examples/keyword-spotter.py \
590-
--tokens=$repo/tokens.txt \
591-
--encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
592-
--decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
593-
--joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
594-
--keywords-file=$repo/test_wavs/test_keywords.txt \
595-
$repo/test_wavs/0.wav \
596-
$repo/test_wavs/1.wav
597-
598-
rm -rf $repo
599-
600577
if [[ x$OS != x'windows-latest' ]]; then
601578
echo "OS: $OS"
602579

@@ -612,15 +589,7 @@ if [[ x$OS != x'windows-latest' ]]; then
612589
repo=$dir/$repo
613590
ls -lh $repo
614591

615-
python3 ./python-api-examples/keyword-spotter.py \
616-
--tokens=$repo/tokens.txt \
617-
--encoder=$repo/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
618-
--decoder=$repo/decoder-epoch-12-avg-2-chunk-16-left-64.onnx \
619-
--joiner=$repo/joiner-epoch-12-avg-2-chunk-16-left-64.onnx \
620-
--keywords-file=$repo/test_wavs/test_keywords.txt \
621-
$repo/test_wavs/3.wav \
622-
$repo/test_wavs/4.wav \
623-
$repo/test_wavs/5.wav
592+
python3 ./python-api-examples/keyword-spotter.py
624593

625594
python3 sherpa-onnx/python/tests/test_keyword_spotter.py --verbose
626595

.github/workflows/c-api.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,27 @@ jobs:
7979
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
8080
fi
8181
82+
- name: Test kws (zh)
83+
shell: bash
84+
run: |
85+
gcc -o kws-c-api ./c-api-examples/kws-c-api.c \
86+
-I ./build/install/include \
87+
-L ./build/install/lib/ \
88+
-l sherpa-onnx-c-api \
89+
-l onnxruntime
90+
91+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
92+
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
93+
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
94+
95+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
96+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
97+
98+
./kws-c-api
99+
100+
rm ./kws-c-api
101+
rm -rf sherpa-onnx-kws-*
102+
82103
- name: Test Kokoro TTS (en)
83104
shell: bash
84105
run: |

.github/workflows/cxx-api.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,28 @@ jobs:
8181
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
8282
fi
8383
84+
- name: Test KWS (zh)
85+
shell: bash
86+
run: |
87+
g++ -std=c++17 -o kws-cxx-api ./cxx-api-examples/kws-cxx-api.cc \
88+
-I ./build/install/include \
89+
-L ./build/install/lib/ \
90+
-l sherpa-onnx-cxx-api \
91+
-l sherpa-onnx-c-api \
92+
-l onnxruntime
93+
94+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
95+
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
96+
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
97+
98+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
99+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
100+
101+
./kws-cxx-api
102+
103+
rm kws-cxx-api
104+
rm -rf sherpa-onnx-kws-*
105+
84106
- name: Test Kokoro TTS (en)
85107
shell: bash
86108
run: |

android/SherpaOnnxKws/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -151,24 +151,27 @@ class MainActivity : AppCompatActivity() {
151151
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
152152
while (kws.isReady(stream)) {
153153
kws.decode(stream)
154-
}
155154

156-
val text = kws.getResult(stream).keyword
155+
val text = kws.getResult(stream).keyword
156+
157+
var textToDisplay = lastText
157158

158-
var textToDisplay = lastText
159+
if (text.isNotBlank()) {
160+
// Remember to reset the stream right after detecting a keyword
159161

160-
if (text.isNotBlank()) {
161-
if (lastText.isBlank()) {
162-
textToDisplay = "$idx: $text"
163-
} else {
164-
textToDisplay = "$idx: $text\n$lastText"
162+
kws.reset(stream)
163+
if (lastText.isBlank()) {
164+
textToDisplay = "$idx: $text"
165+
} else {
166+
textToDisplay = "$idx: $text\n$lastText"
167+
}
168+
lastText = "$idx: $text\n$lastText"
169+
idx += 1
165170
}
166-
lastText = "$idx: $text\n$lastText"
167-
idx += 1
168-
}
169171

170-
runOnUiThread {
171-
textView.text = textToDisplay
172+
runOnUiThread {
173+
textView.text = textToDisplay
174+
}
172175
}
173176
}
174177
}

c-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ include_directories(${CMAKE_SOURCE_DIR})
44
add_executable(decode-file-c-api decode-file-c-api.c)
55
target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
66

7+
add_executable(kws-c-api kws-c-api.c)
8+
target_link_libraries(kws-c-api sherpa-onnx-c-api)
9+
710
if(SHERPA_ONNX_ENABLE_TTS)
811
add_executable(offline-tts-c-api offline-tts-c-api.c)
912
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)

c-api-examples/kws-c-api.c

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// c-api-examples/kws-c-api.c
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
//
5+
// This file demonstrates how to use keywords spotter with sherpa-onnx's C
6+
// clang-format off
7+
//
8+
// Usage
9+
//
10+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
11+
// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
12+
// rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01-mobile.tar.bz2
13+
//
14+
// ./kws-c-api
15+
//
16+
// clang-format on
17+
#include <stdio.h>
18+
#include <stdlib.h> // exit
19+
#include <string.h> // memset
20+
21+
#include "sherpa-onnx/c-api/c-api.h"
22+
23+
int32_t main() {
24+
SherpaOnnxKeywordSpotterConfig config;
25+
26+
memset(&config, 0, sizeof(config));
27+
config.model_config.transducer.encoder =
28+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/"
29+
"encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
30+
31+
config.model_config.transducer.decoder =
32+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/"
33+
"decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
34+
35+
config.model_config.transducer.joiner =
36+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/"
37+
"joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
38+
39+
config.model_config.tokens =
40+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
41+
42+
config.model_config.provider = "cpu";
43+
config.model_config.num_threads = 1;
44+
config.model_config.debug = 1;
45+
46+
config.keywords_file =
47+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/"
48+
"test_keywords.txt";
49+
50+
const SherpaOnnxKeywordSpotter *kws = SherpaOnnxCreateKeywordSpotter(&config);
51+
if (!kws) {
52+
fprintf(stderr, "Please check your config");
53+
exit(-1);
54+
}
55+
56+
fprintf(stderr,
57+
"--Test pre-defined keywords from test_wavs/test_keywords.txt--\n");
58+
59+
const char *wav_filename =
60+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
61+
62+
float tail_paddings[8000] = {0}; // 0.5 seconds
63+
64+
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
65+
if (wave == NULL) {
66+
fprintf(stderr, "Failed to read %s\n", wav_filename);
67+
exit(-1);
68+
}
69+
70+
const SherpaOnnxOnlineStream *stream = SherpaOnnxCreateKeywordStream(kws);
71+
if (!stream) {
72+
fprintf(stderr, "Failed to create stream\n");
73+
exit(-1);
74+
}
75+
76+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
77+
wave->num_samples);
78+
79+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
80+
sizeof(tail_paddings) / sizeof(float));
81+
SherpaOnnxOnlineStreamInputFinished(stream);
82+
while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
83+
SherpaOnnxDecodeKeywordStream(kws, stream);
84+
const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
85+
if (r && r->json && strlen(r->keyword)) {
86+
fprintf(stderr, "Detected keyword: %s\n", r->json);
87+
88+
// Remember to reset the keyword stream right after a keyword is detected
89+
SherpaOnnxResetKeywordStream(kws, stream);
90+
}
91+
SherpaOnnxDestroyKeywordResult(r);
92+
}
93+
SherpaOnnxDestroyOnlineStream(stream);
94+
95+
// --------------------------------------------------------------------------
96+
97+
fprintf(stderr, "--Use pre-defined keywords + add a new keyword--\n");
98+
99+
stream = SherpaOnnxCreateKeywordStreamWithKeywords(kws, "y ǎn y uán @演员");
100+
101+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
102+
wave->num_samples);
103+
104+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
105+
sizeof(tail_paddings) / sizeof(float));
106+
SherpaOnnxOnlineStreamInputFinished(stream);
107+
while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
108+
SherpaOnnxDecodeKeywordStream(kws, stream);
109+
const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
110+
if (r && r->json && strlen(r->keyword)) {
111+
fprintf(stderr, "Detected keyword: %s\n", r->json);
112+
113+
// Remember to reset the keyword stream
114+
SherpaOnnxResetKeywordStream(kws, stream);
115+
}
116+
SherpaOnnxDestroyKeywordResult(r);
117+
}
118+
SherpaOnnxDestroyOnlineStream(stream);
119+
120+
// --------------------------------------------------------------------------
121+
122+
fprintf(stderr, "--Use pre-defined keywords + add two new keywords--\n");
123+
124+
stream = SherpaOnnxCreateKeywordStreamWithKeywords(
125+
kws, "y ǎn y uán @演员/zh ī m íng @知名");
126+
127+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, wave->samples,
128+
wave->num_samples);
129+
130+
SherpaOnnxOnlineStreamAcceptWaveform(stream, wave->sample_rate, tail_paddings,
131+
sizeof(tail_paddings) / sizeof(float));
132+
SherpaOnnxOnlineStreamInputFinished(stream);
133+
while (SherpaOnnxIsKeywordStreamReady(kws, stream)) {
134+
SherpaOnnxDecodeKeywordStream(kws, stream);
135+
const SherpaOnnxKeywordResult *r = SherpaOnnxGetKeywordResult(kws, stream);
136+
if (r && r->json && strlen(r->keyword)) {
137+
fprintf(stderr, "Detected keyword: %s\n", r->json);
138+
139+
// Remember to reset the keyword stream
140+
SherpaOnnxResetKeywordStream(kws, stream);
141+
}
142+
SherpaOnnxDestroyKeywordResult(r);
143+
}
144+
SherpaOnnxDestroyOnlineStream(stream);
145+
146+
SherpaOnnxFreeWave(wave);
147+
SherpaOnnxDestroyKeywordSpotter(kws);
148+
149+
return 0;
150+
}

cxx-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ include_directories(${CMAKE_SOURCE_DIR})
33
add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
44
target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
55

6+
add_executable(kws-cxx-api ./kws-cxx-api.cc)
7+
target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
8+
69
add_executable(streaming-zipformer-rtf-cxx-api ./streaming-zipformer-rtf-cxx-api.cc)
710
target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)
811

0 commit comments

Comments
 (0)