XiaYucca
diff --git a/‎.github/scripts/test-offline-ctc.sh‎
Lines changed: 24 additions & 1 deletion b/‎.github/scripts/test-offline-ctc.sh‎
Lines changed: 24 additions & 1 deletion
diff --git a/‎.github/scripts/test-python.sh‎
Lines changed: 12 additions & 0 deletions b/‎.github/scripts/test-python.sh‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.github/workflows/export-sense-voice-to-onnx.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/export-sense-voice-to-onnx.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python-api-examples/offline-sense-voice-ctc-decode-files.py‎
Lines changed: 67 additions & 0 deletions b/‎python-api-examples/offline-sense-voice-ctc-decode-files.py‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎scripts/sense-voice/export-onnx.py‎
Lines changed: 7 additions & 2 deletions b/‎scripts/sense-voice/export-onnx.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎sherpa-onnx/c-api/c-api.cc‎
Lines changed: 10 additions & 0 deletions b/‎sherpa-onnx/c-api/c-api.cc‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎sherpa-onnx/c-api/c-api.h‎
Lines changed: 7 additions & 0 deletions b/‎sherpa-onnx/c-api/c-api.h‎
Lines changed: 7 additions & 0 deletions
@@ -15,7 +15,30 @@ echo "PATH: $PATH"
 
 which $EXE
 
-if false; then
+log "------------------------------------------------------------"
+log "Run SenseVoice models"
+log "------------------------------------------------------------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+repo=sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
+
+for m in model.onnx model.int8.onnx; do
+  for w in zh en yue ja ko; do
+    for use_itn in 0 1; do
+      echo "$m $w $use_itn"
+      time $EXE \
+        --tokens=$repo/tokens.txt \
+        --sense-voice-model=$repo/$m \
+        --sense-voice-use-itn=$use_itn \
+        $repo/test_wavs/$w.wav
+    done
+  done
+done
+
+rm -rf $repo
+
+if true; then
   # It has problems with onnxruntime 1.18
   log "------------------------------------------------------------"
   log "Run Wenet models"
 
@@ -10,6 +10,18 @@ log() {
 
 export GIT_CLONE_PROTECTION_ACTIVE=false
 
+log "test offline SenseVoice CTC"
+url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+name=$(basename $url)
+repo=$(basename -s .tar.bz2 $name)
+
+curl -SL -O $url
+tar xvf $name
+rm $name
+ls -lh $repo
+python3 ./python-api-examples/offline-sense-voice-ctc-decode-files.py
+rm -rf $repo
+
 log "test offline TeleSpeech CTC"
 url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
 name=$(basename $url)
 
@@ -73,7 +73,7 @@ jobs:
             echo "pwd: $PWD"
             ls -lh ../scripts/sense-voice
 
-            rm -rf ./
+            rm -rf ./*
 
             cp -v ../scripts/sense-voice/*.onnx .
             cp -v ../scripts/sense-voice/tokens.txt .
 
@@ -111,3 +111,4 @@ sherpa-onnx-telespeech-ctc-*
 *.fst
 .ccache
 lib*.a
+sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
@@ -1,3 +1,7 @@
+## 1.10.17
+
+* Support SenseVoice CTC models.
+
 ## 1.10.16
 
 * Support zh-en TTS model from MeloTTS.
 
@@ -11,7 +11,7 @@ project(sherpa-onnx)
 # ./nodejs-addon-examples
 # ./dart-api-examples/
 # ./CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.16")
+set(SHERPA_ONNX_VERSION "1.10.17")
 
 # Disable warning about
 #
 
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+
+"""
+This file shows how to use a non-streaming SenseVoice CTC model from
+https://github.com/FunAudioLLM/SenseVoice
+to decode files.
+
+Please download model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+
+For instance,
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+"""
+
+from pathlib import Path
+
+import sherpa_onnx
+import soundfile as sf
+
+
+def create_recognizer():
+    model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
+    tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
+    test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav"
+    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"
+    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/ja.wav"
+    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/ko.wav"
+    #  test_wav = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/yue.wav"
+
+    if not Path(model).is_file() or not Path(test_wav).is_file():
+        raise ValueError(
+            """Please download model files from
+            https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+            """
+        )
+    return (
+        sherpa_onnx.OfflineRecognizer.from_sense_voice(
+            model=model,
+            tokens=tokens,
+            use_itn=True,
+            debug=True,
+        ),
+        test_wav,
+    )
+
+
+def main():
+    recognizer, wave_filename = create_recognizer()
+
+    audio, sample_rate = sf.read(wave_filename, dtype="float32", always_2d=True)
+    audio = audio[:, 0]  # only use the first channel
+
+    # audio is a 1-D float32 numpy array normalized to the range [-1, 1]
+    # sample_rate does not need to be 16000 Hz
+
+    stream = recognizer.create_stream()
+    stream.accept_waveform(sample_rate, audio)
+    recognizer.decode_stream(stream)
+    print(wave_filename)
+    print(stream.result)
+
+
+if __name__ == "__main__":
+    main()
@@ -162,7 +162,9 @@ def main():
         "neg_mean": neg_mean,
         "inv_stddev": inv_stddev,
         "model_type": "sense_voice_ctc",
-        "version": "1",
+        # version 1: Use QInt8
+        # version 2: Use QUInt8
+        "version": "2",
         "model_author": "iic",
         "maintainer": "k2-fsa",
         "vocab_size": vocab_size,
@@ -185,7 +187,10 @@ def main():
         model_input=filename,
         model_output=filename_int8,
         op_types_to_quantize=["MatMul"],
-        weight_type=QuantType.QInt8,
+        # Note that we have to use QUInt8 here.
+        #
+        # When QInt8 is used, C++ onnxruntime produces incorrect results
+        weight_type=QuantType.QUInt8,
     )
 
 
 
@@ -310,6 +310,7 @@ struct SherpaOnnxOfflineStream {
 
 static sherpa_onnx::OfflineRecognizerConfig convertConfig(
     const SherpaOnnxOfflineRecognizerConfig *config);
+
 SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
     const SherpaOnnxOfflineRecognizerConfig *config) {
   sherpa_onnx::OfflineRecognizerConfig recognizer_config =
@@ -391,6 +392,15 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig(
   recognizer_config.model_config.telespeech_ctc =
       SHERPA_ONNX_OR(config->model_config.telespeech_ctc, "");
 
+  recognizer_config.model_config.sense_voice.model =
+      SHERPA_ONNX_OR(config->model_config.sense_voice.model, "");
+
+  recognizer_config.model_config.sense_voice.language =
+      SHERPA_ONNX_OR(config->model_config.sense_voice.language, "");
+
+  recognizer_config.model_config.sense_voice.use_itn =
+      config->model_config.sense_voice.use_itn;
+
   recognizer_config.lm_config.model =
       SHERPA_ONNX_OR(config->lm_config.model, "");
   recognizer_config.lm_config.scale =
 
@@ -379,6 +379,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
   float scale;
 } SherpaOnnxOfflineLMConfig;
 
+SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSenseVoiceModelConfig {
+  const char *model;
+  const char *language;
+  int32_t use_itn;
+} SherpaOnnxOfflineSenseVoiceModelConfig;
+
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
   SherpaOnnxOfflineTransducerModelConfig transducer;
   SherpaOnnxOfflineParaformerModelConfig paraformer;
@@ -398,6 +404,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
   const char *modeling_unit;
   const char *bpe_vocab;
   const char *telespeech_ctc;
+  SherpaOnnxOfflineSenseVoiceModelConfig sense_voice;
 } SherpaOnnxOfflineModelConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
-Original file line number
+Diff line change
 *.fst
 .ccache
 lib*.a
 +sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ project(sherpa-onnx)`
`11`	`11`	`# ./nodejs-addon-examples`
`12`	`12`	`# ./dart-api-examples/`
`13`	`13`	`# ./CHANGELOG.md`
`14`		`-set(SHERPA_ONNX_VERSION "1.10.16")`
	`14`	`+set(SHERPA_ONNX_VERSION "1.10.17")`
`15`	`15`
`16`	`16`	`# Disable warning about`
`17`	`17`	`#`