Skip to content

Commit 0de7e1b

Browse files
authored
Add C++ and Python API for Dolphin CTC models (#2085)
1 parent 1316719 commit 0de7e1b

27 files changed

+671
-26
lines changed

.github/scripts/test-offline-ctc.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,39 @@ echo "PATH: $PATH"
1515

1616
which $EXE
1717

18+
for type in base small; do
19+
log "------------------------------------------------------------"
20+
log "Run Dolphin CTC models ($type int8)"
21+
log "------------------------------------------------------------"
22+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
23+
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
24+
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2
25+
26+
$EXE \
27+
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
28+
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/tokens.txt \
29+
--debug=1 \
30+
./sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
31+
32+
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
33+
34+
log "------------------------------------------------------------"
35+
log "Run Dolphin CTC models ($type)"
36+
log "------------------------------------------------------------"
37+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
38+
tar xvf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
39+
rm sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2
40+
41+
$EXE \
42+
--dolphin-model=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/model.onnx \
43+
--tokens=./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/tokens.txt \
44+
--debug=1 \
45+
./sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02/test_wavs/0.wav
46+
47+
rm -rf sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
48+
done
49+
50+
1851
log "------------------------------------------------------------"
1952
log "Run NeMo GigaAM Russian models"
2053
log "------------------------------------------------------------"

.github/scripts/test-python.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ log() {
88
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
99
}
1010

11+
log "test offline dolphin ctc"
12+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
13+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
14+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
15+
16+
python3 ./python-api-examples/offline-dolphin-ctc-decode-files.py
17+
18+
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
19+
1120
log "test offline speech enhancement (GTCRN)"
1221

1322
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: export-dolphin-ctc-to-onnx
2+
3+
on:
4+
workflow_dispatch:
5+
6+
concurrency:
7+
group: export-dolphin-ctc-to-onnx-${{ github.ref }}
8+
cancel-in-progress: true
9+
10+
jobs:
11+
export-dolphin-ctc-to-onnx:
12+
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
13+
name: ${{ matrix.model_type }}
14+
runs-on: ${{ matrix.os }}
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
os: [macos-latest]
19+
model_type: [small, base]
20+
21+
steps:
22+
- uses: actions/checkout@v4
23+
24+
- name: Download ${{ matrix.model_type }}
25+
shell: bash
26+
run: |
27+
git lfs install
28+
type=${{ matrix.model_type }}
29+
30+
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
31+
git clone https://huggingface.co/csukuangfj/sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
32+
33+
rm -rf sherpa-onnx-dolphin-*/.git*
34+
35+
ls -lha sherpa-onnx-dolphin-*/
36+
37+
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-int8-2025-04-02
38+
tar cjfv sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02.tar.bz2 sherpa-onnx-dolphin-$type-ctc-multi-lang-2025-04-02
39+
40+
- name: Release
41+
uses: svenstaro/upload-release-action@v2
42+
with:
43+
file_glob: true
44+
file: ./*.tar.bz2
45+
overwrite: true
46+
repo_name: k2-fsa/sherpa-onnx
47+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
48+
tag: asr-models

.github/workflows/linux.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,16 @@ jobs:
205205
overwrite: true
206206
file: sherpa-onnx-*.tar.bz2
207207

208+
- name: Test offline CTC
209+
shell: bash
210+
run: |
211+
du -h -d1 .
212+
export PATH=$PWD/build/bin:$PATH
213+
export EXE=sherpa-onnx-offline
214+
215+
.github/scripts/test-offline-ctc.sh
216+
du -h -d1 .
217+
208218
- name: Test offline speech denoiser
209219
shell: bash
210220
run: |
@@ -249,16 +259,6 @@ jobs:
249259
.github/scripts/test-offline-moonshine.sh
250260
du -h -d1 .
251261
252-
- name: Test offline CTC
253-
shell: bash
254-
run: |
255-
du -h -d1 .
256-
export PATH=$PWD/build/bin:$PATH
257-
export EXE=sherpa-onnx-offline
258-
259-
.github/scripts/test-offline-ctc.sh
260-
du -h -d1 .
261-
262262
- name: Test C++ API
263263
shell: bash
264264
run: |

.github/workflows/macos.yaml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,14 @@ jobs:
162162
overwrite: true
163163
file: sherpa-onnx-*osx-universal2*.tar.bz2
164164

165+
- name: Test offline CTC
166+
shell: bash
167+
run: |
168+
export PATH=$PWD/build/bin:$PATH
169+
export EXE=sherpa-onnx-offline
170+
171+
.github/scripts/test-offline-ctc.sh
172+
165173
- name: Test offline speech denoiser
166174
shell: bash
167175
run: |
@@ -226,14 +234,6 @@ jobs:
226234
227235
.github/scripts/test-online-punctuation.sh
228236
229-
- name: Test offline CTC
230-
shell: bash
231-
run: |
232-
export PATH=$PWD/build/bin:$PATH
233-
export EXE=sherpa-onnx-offline
234-
235-
.github/scripts/test-offline-ctc.sh
236-
237237
- name: Test online CTC
238238
shell: bash
239239
run: |

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
2+
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
3+
endif()
4+
15
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
26

37
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum OS X deployment version. Used only for macOS")
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This file shows how to use a non-streaming CTC model from Dolphin
5+
to decode files.
6+
7+
Please download model files from
8+
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
9+
"""
10+
11+
from pathlib import Path
12+
import time
13+
14+
import sherpa_onnx
15+
import soundfile as sf
16+
17+
18+
def create_recognizer():
19+
model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx"
20+
tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt"
21+
test_wav = (
22+
"./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav"
23+
)
24+
25+
if not Path(model).is_file() or not Path(test_wav).is_file():
26+
raise ValueError(
27+
"""Please download model files from
28+
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
29+
"""
30+
)
31+
return (
32+
sherpa_onnx.OfflineRecognizer.from_dolphin_ctc(
33+
model=model,
34+
tokens=tokens,
35+
debug=True,
36+
),
37+
test_wav,
38+
)
39+
40+
41+
def main():
42+
recognizer, wave_filename = create_recognizer()
43+
44+
audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
45+
audio = audio[:, 0] # only use the first channel
46+
47+
# audio is a 1-D float32 numpy array normalized to the range [-1, 1]
48+
# sample_rate does not need to be 16000 Hz
49+
50+
start = time.time()
51+
stream = recognizer.create_stream()
52+
stream.accept_waveform(sample_rate, audio)
53+
recognizer.decode_stream(stream)
54+
end = time.time()
55+
56+
print(wave_filename)
57+
print(stream.result)
58+
59+
elapsed_seconds = end - start
60+
audio_duration = len(audio) / sample_rate
61+
real_time_factor = elapsed_seconds / audio_duration
62+
63+
print(f"Elapsed seconds: {elapsed_seconds:.3f}")
64+
print(f"Audio duration in seconds: {audio_duration:.3f}")
65+
print(f"RTF: {elapsed_seconds:.3f}/{audio_duration:.3f} = {real_time_factor:.3f}")
66+
67+
68+
if __name__ == "__main__":
69+
main()

sherpa-onnx/csrc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ set(sources
2727
offline-ctc-fst-decoder.cc
2828
offline-ctc-greedy-search-decoder.cc
2929
offline-ctc-model.cc
30+
offline-dolphin-model-config.cc
31+
offline-dolphin-model.cc
3032
offline-fire-red-asr-greedy-search-decoder.cc
3133
offline-fire-red-asr-model-config.cc
3234
offline-fire-red-asr-model.cc

sherpa-onnx/csrc/offline-ctc-model.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "sherpa-onnx/csrc/file-utils.h"
2222
#include "sherpa-onnx/csrc/macros.h"
23+
#include "sherpa-onnx/csrc/offline-dolphin-model.h"
2324
#include "sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h"
2425
#include "sherpa-onnx/csrc/offline-tdnn-ctc-model.h"
2526
#include "sherpa-onnx/csrc/offline-telespeech-ctc-model.h"
@@ -110,6 +111,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
110111

111112
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
112113
const OfflineModelConfig &config) {
114+
if (!config.dolphin.model.empty()) {
115+
return std::make_unique<OfflineDolphinModel>(config);
116+
}
117+
113118
// TODO(fangjun): Refactor it. We don't need to use model_type here
114119
ModelType model_type = ModelType::kUnknown;
115120

@@ -160,6 +165,10 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
160165
template <typename Manager>
161166
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
162167
Manager *mgr, const OfflineModelConfig &config) {
168+
if (!config.dolphin.model.empty()) {
169+
return std::make_unique<OfflineDolphinModel>(mgr, config);
170+
}
171+
163172
// TODO(fangjun): Refactor it. We don't need to use model_type here
164173
ModelType model_type = ModelType::kUnknown;
165174

sherpa-onnx/csrc/offline-ctc-model.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ class OfflineCtcModel {
6464
// return true for models from https://github.com/salute-developers/GigaAM
6565
// return false otherwise
6666
virtual bool IsGigaAM() const { return false; }
67+
68+
// For Dolphin models, they use global CMVN
69+
virtual void NormalizeFeatures(float *features, int32_t num_frames,
70+
int32_t feat_dim) const {}
6771
};
6872

6973
} // namespace sherpa_onnx

0 commit comments

Comments
 (0)