Skip to content

Commit f5dfcf8

Browse files
authored
Add Kotlin and Java API for online punctuation models (#1936)
1 parent 815ebac commit f5dfcf8

16 files changed

+474
-13
lines changed

.github/workflows/run-java-test.yaml

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,22 @@ jobs:
105105
make -j4
106106
ls -lh lib
107107
108+
- name: Run java test (Online add punctuations)
109+
shell: bash
110+
run: |
111+
cd ./java-api-examples
112+
./run-online-add-punctuation-zh-en.sh
113+
# Delete model files to save space
114+
rm -rf sherpa-onnx-online-*
115+
116+
- name: Run java test (Offline add punctuations)
117+
shell: bash
118+
run: |
119+
cd ./java-api-examples
120+
./run-offline-add-punctuation-zh-en.sh
121+
# Delete model files to save space
122+
rm -rf sherpa-onnx-punct-*
123+
108124
- name: Run java test (Non-Streaming ASR)
109125
shell: bash
110126
run: |
@@ -196,13 +212,6 @@ jobs:
196212
./run-audio-tagging-ced-from-file.sh
197213
rm -rf sherpa-onnx-ced-*
198214
199-
- name: Run java test (add punctuations)
200-
shell: bash
201-
run: |
202-
cd ./java-api-examples
203-
./run-add-punctuation-zh-en.sh
204-
# Delete model files to save space
205-
rm -rf sherpa-onnx-punct-*
206215
207216
- name: Run java test (Spoken language identification)
208217
shell: bash

java-api-examples/AddPunctuation.java renamed to java-api-examples/OfflineAddPunctuation.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// The model supports both English and Chinese.
66
import com.k2fsa.sherpa.onnx.*;
77

8-
public class AddPunctuation {
8+
public class OfflineAddPunctuation {
99
public static void main(String[] args) {
1010
// please download the model from
1111
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
// This file shows how to use a punctuation model to add punctuations to text.
4+
//
5+
// The model supports ONLY English.
6+
import com.k2fsa.sherpa.onnx.*;
7+
8+
public class OnlineAddPunctuation {
9+
public static void main(String[] args) {
10+
// please download the model from
11+
// https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
12+
String model = "./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx";
13+
String bpeVocab = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab";
14+
OnlinePunctuationModelConfig modelConfig =
15+
OnlinePunctuationModelConfig.builder()
16+
.setCnnBilstm(model)
17+
.setBpeVocab(bpeVocab)
18+
.setNumThreads(1)
19+
.setDebug(true)
20+
.build();
21+
OnlinePunctuationConfig config =
22+
OnlinePunctuationConfig.builder().setModel(modelConfig).build();
23+
24+
OnlinePunctuation punct = new OnlinePunctuation(config);
25+
26+
String[] sentences =
27+
new String[] {
28+
"how are you doing fantastic thank you how about you",
29+
"The African blogosphere is rapidly expanding bringing more voices online in the form of"
30+
+ " commentaries opinions analyses rants and poetry",
31+
};
32+
33+
System.out.println("---");
34+
for (String text : sentences) {
35+
String out = punct.addPunctuation(text);
36+
System.out.printf("Input: %s\n", text);
37+
System.out.printf("Output: %s\n", out);
38+
System.out.println("---");
39+
}
40+
}
41+
}

java-api-examples/run-add-punctuation-zh-en.sh renamed to java-api-examples/run-offline-add-punctuation-zh-en.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,4 @@ fi
3434
java \
3535
-Djava.library.path=$PWD/../build/lib \
3636
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
37-
./AddPunctuation.java
37+
./OfflineAddPunctuation.java
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
30+
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
31+
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
32+
fi
33+
34+
java \
35+
-Djava.library.path=$PWD/../build/lib \
36+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
37+
./OnlineAddPunctuation.java
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../sherpa-onnx/kotlin-api/OnlinePunctuation.kt

kotlin-api-examples/run.sh

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -302,16 +302,16 @@ function testInverseTextNormalizationOnlineAsr() {
302302
java -Djava.library.path=../build/lib -jar $out_filename
303303
}
304304

305-
function testPunctuation() {
305+
function testOfflinePunctuation() {
306306
if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
307307
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
308308
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
309309
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
310310
fi
311311

312-
out_filename=test_punctuation.jar
312+
out_filename=test_offline_punctuation.jar
313313
kotlinc-jvm -include-runtime -d $out_filename \
314-
./test_punctuation.kt \
314+
./test_offline_punctuation.kt \
315315
./OfflinePunctuation.kt \
316316
faked-asset-manager.kt \
317317
faked-log.kt
@@ -321,6 +321,25 @@ function testPunctuation() {
321321
java -Djava.library.path=../build/lib -jar $out_filename
322322
}
323323

324+
function testOnlinePunctuation() {
325+
if [ ! -f ./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx ]; then
326+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
327+
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
328+
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
329+
fi
330+
331+
out_filename=test_online_punctuation.jar
332+
kotlinc-jvm -include-runtime -d $out_filename \
333+
./test_online_punctuation.kt \
334+
./OnlinePunctuation.kt \
335+
faked-asset-manager.kt \
336+
faked-log.kt
337+
338+
ls -lh $out_filename
339+
340+
java -Djava.library.path=../build/lib -jar $out_filename
341+
}
342+
324343
function testOfflineSpeakerDiarization() {
325344
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
326345
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
@@ -359,6 +378,7 @@ testTts
359378
testAudioTagging
360379
testSpokenLanguageIdentification
361380
testOfflineAsr
362-
testPunctuation
381+
testOfflinePunctuation
382+
testOnlinePunctuation
363383
testInverseTextNormalizationOfflineAsr
364384
testInverseTextNormalizationOnlineAsr
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package com.k2fsa.sherpa.onnx
2+
3+
fun main() {
4+
testPunctuation()
5+
}
6+
7+
// https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
8+
fun testPunctuation() {
9+
val config = OnlinePunctuationConfig(
10+
model=OnlinePunctuationModelConfig(
11+
cnnBilstm="./sherpa-onnx-online-punct-en-2024-08-06/model.int8.onnx",
12+
bpeVocab="./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab",
13+
numThreads=1,
14+
debug=true,
15+
provider="cpu",
16+
)
17+
)
18+
val punct = OnlinePunctuation(config = config)
19+
val sentences = arrayOf(
20+
"how are you doing fantastic thank you what is about you",
21+
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
22+
)
23+
println("---")
24+
for (text in sentences) {
25+
val out = punct.addPunctuation(text)
26+
println("Input: $text")
27+
println("Output: $out")
28+
println("---")
29+
}
30+
}

sherpa-onnx/java-api/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ java_files += OfflinePunctuationModelConfig.java
5353
java_files += OfflinePunctuationConfig.java
5454
java_files += OfflinePunctuation.java
5555

56+
java_files += OnlinePunctuationModelConfig.java
57+
java_files += OnlinePunctuationConfig.java
58+
java_files += OnlinePunctuation.java
59+
5660
java_files += OfflineZipformerAudioTaggingModelConfig.java
5761
java_files += AudioTaggingModelConfig.java
5862
java_files += AudioTaggingConfig.java

0 commit comments

Comments
 (0)