Skip to content

Commit da4aad1

Browse files
authored
Add C and CXX API for Dolphin CTC models (#2088)
1 parent eee5575 commit da4aad1

File tree

12 files changed

+231
-5
lines changed

12 files changed

+231
-5
lines changed

.github/workflows/c-api.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,36 @@ jobs:
7979
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
8080
fi
8181
82+
- name: Test Dolphin CTC
83+
shell: bash
84+
run: |
85+
name=dolphin-ctc-c-api
86+
gcc -o $name ./c-api-examples/$name.c \
87+
-I ./build/install/include \
88+
-L ./build/install/lib/ \
89+
-l sherpa-onnx-c-api \
90+
-l onnxruntime
91+
92+
ls -lh $name
93+
94+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
95+
ldd ./$name
96+
echo "----"
97+
readelf -d ./$name
98+
fi
99+
100+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
101+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
102+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
103+
104+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
105+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
106+
107+
./$name
108+
109+
rm $name
110+
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
111+
82112
- name: Test speech enhancement (GTCRN)
83113
shell: bash
84114
run: |

.github/workflows/cxx-api.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,38 @@ jobs:
8181
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
8282
fi
8383
84+
- name: Test Dolphin CTC
85+
shell: bash
86+
run: |
87+
name=dolphin-ctc-cxx-api
88+
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
89+
-I ./build/install/include \
90+
-L ./build/install/lib/ \
91+
-l sherpa-onnx-cxx-api \
92+
-l sherpa-onnx-c-api \
93+
-l onnxruntime
94+
95+
ls -lh $name
96+
97+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
98+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
99+
100+
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
101+
ldd ./$name
102+
echo "----"
103+
readelf -d ./$name
104+
fi
105+
106+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
107+
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
108+
rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
109+
110+
./$name
111+
112+
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
113+
114+
rm $name
115+
84116
- name: Test VAD
85117
shell: bash
86118
run: |

c-api-examples/dolphin-ctc-c-api.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// c-api-examples/dolphin-ctc-c-api.c
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
5+
//
6+
// This file demonstrates how to use Dolphin CTC model with sherpa-onnx's C API.
7+
// clang-format off
8+
//
9+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
10+
// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
11+
// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
12+
//
13+
// clang-format on
14+
15+
#include <stdio.h>
16+
#include <stdlib.h>
17+
#include <string.h>
18+
19+
#include "sherpa-onnx/c-api/c-api.h"
20+
21+
int32_t main() {
22+
// clang-format off
23+
const char *wav_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
24+
const char *model_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
25+
const char *tokens_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
26+
// clang-format on
27+
28+
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
29+
if (wave == NULL) {
30+
fprintf(stderr, "Failed to read %s\n", wav_filename);
31+
return -1;
32+
}
33+
34+
SherpaOnnxOfflineModelConfig offline_model_config;
35+
memset(&offline_model_config, 0, sizeof(offline_model_config));
36+
offline_model_config.debug = 1;
37+
offline_model_config.num_threads = 1;
38+
offline_model_config.provider = "cpu";
39+
offline_model_config.tokens = tokens_filename;
40+
offline_model_config.dolphin.model = model_filename;
41+
42+
// Recognizer config
43+
SherpaOnnxOfflineRecognizerConfig recognizer_config;
44+
memset(&recognizer_config, 0, sizeof(recognizer_config));
45+
recognizer_config.decoding_method = "greedy_search";
46+
recognizer_config.model_config = offline_model_config;
47+
48+
const SherpaOnnxOfflineRecognizer *recognizer =
49+
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
50+
51+
if (recognizer == NULL) {
52+
fprintf(stderr, "Please check your config!\n");
53+
SherpaOnnxFreeWave(wave);
54+
return -1;
55+
}
56+
57+
const SherpaOnnxOfflineStream *stream =
58+
SherpaOnnxCreateOfflineStream(recognizer);
59+
60+
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
61+
wave->num_samples);
62+
SherpaOnnxDecodeOfflineStream(recognizer, stream);
63+
const SherpaOnnxOfflineRecognizerResult *result =
64+
SherpaOnnxGetOfflineStreamResult(stream);
65+
66+
fprintf(stderr, "Decoded text: %s\n", result->text);
67+
68+
SherpaOnnxDestroyOfflineRecognizerResult(result);
69+
SherpaOnnxDestroyOfflineStream(stream);
70+
SherpaOnnxDestroyOfflineRecognizer(recognizer);
71+
SherpaOnnxFreeWave(wave);
72+
73+
return 0;
74+
}

cxx-api-examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
2424
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
2525
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
2626

27+
add_executable(dolphin-ctc-cxx-api ./dolphin-ctc-cxx-api.cc)
28+
target_link_libraries(dolphin-ctc-cxx-api sherpa-onnx-cxx-api)
29+
2730
add_executable(vad-cxx-api ./vad-cxx-api.cc)
2831
target_link_libraries(vad-cxx-api sherpa-onnx-cxx-api)
2932

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// cxx-api-examples/dolphin-ctc-cxx-api.cc
2+
// Copyright (c) 2025 Xiaomi Corporation
3+
4+
//
5+
// This file demonstrates how to use Dolphini CTC model with sherpa-onnx's C++
6+
// API.
7+
//
8+
// clang-format off
9+
//
10+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
11+
// tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
12+
// rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
13+
//
14+
// clang-format on
15+
16+
#include <chrono> // NOLINT
17+
#include <iostream>
18+
#include <string>
19+
20+
#include "sherpa-onnx/c-api/cxx-api.h"
21+
22+
int32_t main() {
23+
using namespace sherpa_onnx::cxx; // NOLINT
24+
OfflineRecognizerConfig config;
25+
26+
// clang-format off
27+
config.model_config.dolphin.model = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx";
28+
config.model_config.tokens = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt";
29+
30+
std::string wave_filename = "./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav";
31+
// clang-format on
32+
33+
config.model_config.num_threads = 1;
34+
35+
std::cout << "Loading model\n";
36+
OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
37+
if (!recongizer.Get()) {
38+
std::cerr << "Please check your config\n";
39+
return -1;
40+
}
41+
std::cout << "Loading model done\n";
42+
43+
Wave wave = ReadWave(wave_filename);
44+
if (wave.samples.empty()) {
45+
std::cerr << "Failed to read: '" << wave_filename << "'\n";
46+
return -1;
47+
}
48+
49+
std::cout << "Start recognition\n";
50+
const auto begin = std::chrono::steady_clock::now();
51+
52+
OfflineStream stream = recongizer.CreateStream();
53+
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
54+
wave.samples.size());
55+
56+
recongizer.Decode(&stream);
57+
58+
OfflineRecognizerResult result = recongizer.GetResult(&stream);
59+
60+
const auto end = std::chrono::steady_clock::now();
61+
const float elapsed_seconds =
62+
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
63+
.count() /
64+
1000.;
65+
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
66+
float rtf = elapsed_seconds / duration;
67+
68+
std::cout << "text: " << result.text << "\n";
69+
printf("Number of threads: %d\n", config.model_config.num_threads);
70+
printf("Duration: %.3fs\n", duration);
71+
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
72+
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
73+
duration, rtf);
74+
75+
return 0;
76+
}

scripts/apk/generate-asr-2pass-apk-script.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,6 @@ def get_1st_models():
333333
rm -f bpe.model
334334
335335
rm -rf test_wavs
336-
rm README.md
337336
338337
ls -lh
339338
@@ -354,7 +353,6 @@ def get_1st_models():
354353
rm -f bpe.model
355354
356355
rm -rf test_wavs
357-
rm README.md
358356
359357
ls -lh
360358

scripts/apk/generate-asr-apk-script.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ def get_models():
277277
rm -f bpe.model
278278
279279
rm -rf test_wavs
280-
rm README.md
281280
282281
ls -lh
283282
@@ -298,7 +297,6 @@ def get_models():
298297
rm -f bpe.model
299298
300299
rm -rf test_wavs
301-
rm README.md
302300
303301
ls -lh
304302

scripts/apk/generate-vad-asr-apk-script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,7 @@ def get_models():
448448
idx=25,
449449
lang="multi_lang",
450450
lang2="multi_lang",
451-
short_name="multi_lang",
451+
short_name="dolphin_base_ctc",
452452
cmd="""
453453
pushd $model_name
454454

sherpa-onnx/c-api/c-api.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
467467
recognizer_config.model_config.fire_red_asr.decoder =
468468
SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, "");
469469

470+
recognizer_config.model_config.dolphin.model =
471+
SHERPA_ONNX_OR(config->model_config.dolphin.model, "");
472+
470473
recognizer_config.lm_config.model =
471474
SHERPA_ONNX_OR(config->lm_config.model, "");
472475
recognizer_config.lm_config.scale =

sherpa-onnx/c-api/c-api.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSenseVoiceModelConfig {
416416
int32_t use_itn;
417417
} SherpaOnnxOfflineSenseVoiceModelConfig;
418418

419+
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineDolphinModelConfig {
420+
const char *model;
421+
} SherpaOnnxOfflineDolphinModelConfig;
422+
419423
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
420424
SherpaOnnxOfflineTransducerModelConfig transducer;
421425
SherpaOnnxOfflineParaformerModelConfig paraformer;
@@ -438,6 +442,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
438442
SherpaOnnxOfflineSenseVoiceModelConfig sense_voice;
439443
SherpaOnnxOfflineMoonshineModelConfig moonshine;
440444
SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
445+
SherpaOnnxOfflineDolphinModelConfig dolphin;
441446
} SherpaOnnxOfflineModelConfig;
442447

443448
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {

0 commit comments

Comments
 (0)