Skip to content

Commit 21a7a36

Browse files
authored
Add WebAssembly for SenseVoice (k2-fsa#1158)
1 parent 4cf297b commit 21a7a36

21 files changed

+383
-351
lines changed

.github/scripts/test-nodejs-npm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@ ls -lh
1010
ls -lh node_modules
1111

1212
# offline asr
13+
#
14+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
15+
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
16+
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
17+
18+
node ./test-offline-sense-voice.js
19+
rm -rf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
1320

1421
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
1522
ls -lh

.github/workflows/test-nodejs.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@ jobs:
4848
with:
4949
fetch-depth: 0
5050

51+
- name: ccache
52+
uses: hendrikmuhs/[email protected]
53+
with:
54+
key: ${{ matrix.os }}-${{ matrix.build_type }}-wasm-nodejs
55+
5156
- name: Install emsdk
5257
uses: mymindstorm/setup-emsdk@v14
5358

@@ -77,6 +82,10 @@ jobs:
7782
env:
7883
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
7984
run: |
85+
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
86+
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
87+
cmake --version
88+
8089
./build-wasm-simd-nodejs.sh
8190
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.js ./scripts/nodejs/
8291
cp -v build-wasm-simd-nodejs/install/bin/wasm/nodejs/*.wasm ./scripts/nodejs/

nodejs-examples/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,21 @@ tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
8888
node ./test-offline-paraformer.js
8989
```
9090

91+
## ./test-offline-sense-voice.js
92+
93+
[./test-offline-sense-voice.js](./test-offline-sense-voice.js) demonstrates
94+
how to decode a file with a non-streaming Paraformer model.
95+
96+
You can use the following command to run it:
97+
98+
```bash
99+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
100+
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
101+
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
102+
103+
node ./test-offline-sense-voice.js
104+
```
105+
91106
## ./test-offline-transducer.js
92107

93108
[./test-offline-transducer.js](./test-offline-transducer.js) demonstrates

nodejs-examples/test-offline-nemo-ctc.js

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,47 +13,21 @@ function createOfflineRecognizer() {
1313
};
1414

1515
let modelConfig = {
16-
transducer: {
17-
encoder: '',
18-
decoder: '',
19-
joiner: '',
20-
},
21-
paraformer: {
22-
model: '',
23-
},
2416
nemoCtc: {
2517
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
2618
},
27-
whisper: {
28-
encoder: '',
29-
decoder: '',
30-
language: '',
31-
task: '',
32-
tailPaddings: -1,
33-
},
34-
tdnn: {
35-
model: '',
36-
},
3719
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
3820
numThreads: 1,
3921
debug: 0,
4022
provider: 'cpu',
4123
modelType: 'nemo_ctc',
4224
};
4325

44-
let lmConfig = {
45-
model: '',
46-
scale: 1.0,
47-
};
48-
4926
let config = {
5027
featConfig: featConfig,
5128
modelConfig: modelConfig,
52-
lmConfig: lmConfig,
5329
decodingMethod: 'greedy_search',
5430
maxActivePaths: 4,
55-
hotwordsFile: '',
56-
hotwordsScore: 1.5,
5731
};
5832

5933
return sherpa_onnx.createOfflineRecognizer(config);

nodejs-examples/test-offline-paraformer-itn.js

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,47 +13,21 @@ function createOfflineRecognizer() {
1313
};
1414

1515
let modelConfig = {
16-
transducer: {
17-
encoder: '',
18-
decoder: '',
19-
joiner: '',
20-
},
2116
paraformer: {
2217
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
2318
},
24-
nemoCtc: {
25-
model: '',
26-
},
27-
whisper: {
28-
encoder: '',
29-
decoder: '',
30-
language: '',
31-
task: '',
32-
tailPaddings: -1,
33-
},
34-
tdnn: {
35-
model: '',
36-
},
3719
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
3820
numThreads: 1,
3921
debug: 0,
4022
provider: 'cpu',
4123
modelType: 'paraformer',
4224
};
4325

44-
let lmConfig = {
45-
model: '',
46-
scale: 1.0,
47-
};
4826

4927
let config = {
5028
featConfig: featConfig,
5129
modelConfig: modelConfig,
52-
lmConfig: lmConfig,
5330
decodingMethod: 'greedy_search',
54-
maxActivePaths: 4,
55-
hotwordsFile: '',
56-
hotwordsScore: 1.5,
5731
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
5832
ruleFsts: './itn_zh_number.fst',
5933
};

nodejs-examples/test-offline-paraformer.js

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,47 +13,20 @@ function createOfflineRecognizer() {
1313
};
1414

1515
let modelConfig = {
16-
transducer: {
17-
encoder: '',
18-
decoder: '',
19-
joiner: '',
20-
},
2116
paraformer: {
2217
model: './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx',
2318
},
24-
nemoCtc: {
25-
model: '',
26-
},
27-
whisper: {
28-
encoder: '',
29-
decoder: '',
30-
language: '',
31-
task: '',
32-
tailPaddings: -1,
33-
},
34-
tdnn: {
35-
model: '',
36-
},
3719
tokens: './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt',
3820
numThreads: 1,
3921
debug: 0,
4022
provider: 'cpu',
4123
modelType: 'paraformer',
4224
};
4325

44-
let lmConfig = {
45-
model: '',
46-
scale: 1.0,
47-
};
48-
4926
let config = {
5027
featConfig: featConfig,
5128
modelConfig: modelConfig,
52-
lmConfig: lmConfig,
5329
decodingMethod: 'greedy_search',
54-
maxActivePaths: 4,
55-
hotwordsFile: '',
56-
hotwordsScore: 1.5,
5730
};
5831

5932
return sherpa_onnx.createOfflineRecognizer(config);
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
const fs = require('fs');
4+
const {Readable} = require('stream');
5+
const wav = require('wav');
6+
7+
const sherpa_onnx = require('sherpa-onnx');
8+
9+
function createOfflineRecognizer() {
10+
let featConfig = {
11+
sampleRate: 16000,
12+
featureDim: 80,
13+
};
14+
15+
let modelConfig = {
16+
senseVoice: {
17+
model:
18+
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx',
19+
language: '',
20+
useInverseTextNormalization: 1,
21+
},
22+
tokens: './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt',
23+
numThreads: 1,
24+
debug: 0,
25+
provider: 'cpu',
26+
};
27+
28+
let config = {
29+
featConfig: featConfig,
30+
modelConfig: modelConfig,
31+
decodingMethod: 'greedy_search',
32+
};
33+
34+
return sherpa_onnx.createOfflineRecognizer(config);
35+
}
36+
37+
38+
const recognizer = createOfflineRecognizer();
39+
const stream = recognizer.createStream();
40+
41+
const waveFilename =
42+
'./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav';
43+
44+
const reader = new wav.Reader();
45+
const readable = new Readable().wrap(reader);
46+
const buf = [];
47+
48+
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
49+
if (sampleRate != recognizer.config.featConfig.sampleRate) {
50+
throw new Error(`Only support sampleRate ${
51+
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
52+
}
53+
54+
if (audioFormat != 1) {
55+
throw new Error(`Only support PCM format. Given ${audioFormat}`);
56+
}
57+
58+
if (channels != 1) {
59+
throw new Error(`Only a single channel. Given ${channel}`);
60+
}
61+
62+
if (bitDepth != 16) {
63+
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
64+
}
65+
});
66+
67+
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
68+
.pipe(reader)
69+
.on('finish', function(err) {
70+
// tail padding
71+
const floatSamples =
72+
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
73+
74+
buf.push(floatSamples);
75+
const flattened =
76+
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
77+
78+
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
79+
recognizer.decode(stream);
80+
const text = recognizer.getResult(stream).text;
81+
console.log(text);
82+
83+
stream.free();
84+
recognizer.free();
85+
});
86+
87+
readable.on('readable', function() {
88+
let chunk;
89+
while ((chunk = readable.read()) != null) {
90+
const int16Samples = new Int16Array(
91+
chunk.buffer, chunk.byteOffset,
92+
chunk.length / Int16Array.BYTES_PER_ELEMENT);
93+
94+
const floatSamples = new Float32Array(int16Samples.length);
95+
for (let i = 0; i < floatSamples.length; i++) {
96+
floatSamples[i] = int16Samples[i] / 32768.0;
97+
}
98+
99+
buf.push(floatSamples);
100+
}
101+
});

nodejs-examples/test-offline-transducer.js

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,16 @@ function createOfflineRecognizer() {
2121
joiner:
2222
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
2323
},
24-
paraformer: {
25-
model: '',
26-
},
27-
nemoCtc: {
28-
model: '',
29-
},
30-
whisper: {
31-
encoder: '',
32-
decoder: '',
33-
language: '',
34-
task: '',
35-
tailPaddings: -1,
36-
},
37-
tdnn: {
38-
model: '',
39-
},
4024
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
4125
numThreads: 1,
4226
debug: 0,
4327
provider: 'cpu',
4428
modelType: 'transducer',
4529
};
4630

47-
let lmConfig = {
48-
model: '',
49-
scale: 1.0,
50-
};
51-
5231
let config = {
5332
featConfig: featConfig,
5433
modelConfig: modelConfig,
55-
lmConfig: lmConfig,
5634
decodingMethod: 'greedy_search',
5735
maxActivePaths: 4,
5836
hotwordsFile: '',

nodejs-examples/test-offline-tts-en.js

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@ const sherpa_onnx = require('sherpa-onnx');
55
function createOfflineTts() {
66
let offlineTtsVitsModelConfig = {
77
model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
8-
lexicon: '',
98
tokens: './vits-piper-en_US-amy-low/tokens.txt',
109
dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
11-
dictDir: '',
1210
noiseScale: 0.667,
1311
noiseScaleW: 0.8,
1412
lengthScale: 1.0,
@@ -22,8 +20,6 @@ function createOfflineTts() {
2220

2321
let offlineTtsConfig = {
2422
offlineTtsModelConfig: offlineTtsModelConfig,
25-
ruleFsts: '',
26-
ruleFars: '',
2723
maxNumSentences: 1,
2824
};
2925

nodejs-examples/test-offline-tts-zh.js

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ function createOfflineTts() {
77
model: './vits-icefall-zh-aishell3/model.onnx',
88
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
99
tokens: './vits-icefall-zh-aishell3/tokens.txt',
10-
dataDir: '',
11-
dictDir: '',
1210
noiseScale: 0.667,
1311
noiseScaleW: 0.8,
1412
lengthScale: 1.0,
@@ -31,7 +29,6 @@ function createOfflineTts() {
3129
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
3230
}
3331

34-
3532
const tts = createOfflineTts();
3633
const speakerId = 66;
3734
const speed = 1.0;

0 commit comments

Comments
 (0)