Skip to content

Commit ece3183

Browse files
authored
Export SenseVoice ASR models to Ascend NPU 910B (#2707)
1 parent 194795e commit ece3183

File tree

4 files changed

+484
-0
lines changed

4 files changed

+484
-0
lines changed
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
name: export-sense-voice-to-ascend-npu
2+
3+
on:
4+
push:
5+
branches:
6+
- export-sense-voice-ascend
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: export-sense-voice-to-ascend-npu-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
export-sense-voice-to-ascend-npu:
15+
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
16+
name: ${{ matrix.framework }}
17+
runs-on: ${{ matrix.os }}
18+
container:
19+
# image: ascendai/cann:latest
20+
# image: ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.10
21+
# see https://hub.docker.com/r/gpustack/ascendai-cann/tags?name=8.0
22+
image: gpustack/ascendai-cann:8.0.RC3-910b-ubuntu20.04-py3.9
23+
strategy:
24+
fail-fast: false
25+
matrix:
26+
os: [ubuntu-latest]
27+
python-version: ["3.8"]
28+
framework: ["FunASR", "WSYue-ASR"]
29+
30+
steps:
31+
- uses: actions/checkout@v4
32+
33+
- name: Setup Python ${{ matrix.python-version }}
34+
uses: actions/setup-python@v5
35+
with:
36+
python-version: ${{ matrix.python-version }}
37+
38+
- name: Show Python
39+
shell: bash
40+
run: |
41+
python3 --version
42+
43+
- name: Install curl
44+
shell: bash
45+
run: apt-get update && apt-get install -y curl bzip2
46+
47+
- name: Verify environment
48+
shell: bash
49+
run: |
50+
ls -lh /usr/local/Ascend/ascend-toolkit/set_env.sh
51+
52+
find /usr/local/Ascend -name "libascend*.so" 2>/dev/null
53+
54+
55+
source /usr/local/Ascend/ascend-toolkit/set_env.sh
56+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH
57+
58+
echo "CANN environment:"
59+
which atc || echo "atc not found"
60+
atc --help
61+
62+
- name: Install Python dependencies
63+
shell: bash
64+
run: |
65+
python3 -m pip install "numpy<2" \
66+
onnx==1.17.0 \
67+
torch==2.0.0+cpu -f https://download.pytorch.org/whl/torch \
68+
attrs psutil scipy decorator cloudpickle ml-dtypes tornado \
69+
sentencepiece \
70+
pyyaml
71+
72+
- name: Run SenseVoice from FunAsr
73+
if: matrix.framework == 'FunASR'
74+
shell: bash
75+
run: |
76+
cd scripts/sense-voice/ascend-npu
77+
78+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
79+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/model.pt
80+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
81+
82+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
83+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ja.wav
84+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/ko.wav
85+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
86+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
87+
88+
rm -f README.md || true
89+
90+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/README.md
91+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/LICENSE
92+
93+
echo "export to onnx"
94+
95+
python3 ./export_onnx.py
96+
97+
ls -lh *.onnx
98+
99+
source /usr/local/Ascend/ascend-toolkit/set_env.sh
100+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH
101+
102+
atc --model=./model.onnx \
103+
--framework=5 \
104+
--host_env_os=linux \
105+
--host_env_cpu=aarch64 \
106+
--output=model \
107+
--input_format=ND \
108+
--input_shape="x:1,-1,560;prompt:4" \
109+
--soc_version="Ascend910B"
110+
111+
ls -lh *.om
112+
113+
echo "collect results"
114+
d=sherpa-onnx-ascend-910B-sense-voice-zh-en-ja-ko-yue-2024-07-17
115+
116+
mkdir -p $d
117+
mkdir -p $d/test_wavs
118+
119+
cp -v README.md $d
120+
cp -v LICENSE $d
121+
cp -v model_linux_aarch64.om $d/model.om
122+
cp -v tokens.txt $d
123+
cp -v test_om.py $d
124+
cp -v *.wav $d/test_wavs
125+
ls -lh $d
126+
tar cjfv $d.tar.bz2 $d
127+
ls -lh *.tar.bz2
128+
rm -rf $d
129+
130+
echo "----show---"
131+
ls -lh *.tar.bz2
132+
133+
mv *.tar.bz2 ../../..
134+
135+
- name: Run SenseVoice from WSYue-ASR
136+
if: matrix.framework == 'WSYue-ASR'
137+
shell: bash
138+
run: |
139+
cd scripts/sense-voice/ascend-npu
140+
141+
curl -SL -O https://huggingface.co/ASLP-lab/WSYue-ASR/resolve/main/sensevoice_small_yue/model.pt
142+
143+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/am.mvn
144+
curl -SL -O https://hf-mirror.com/FunAudioLLM/SenseVoiceSmall/resolve/main/chn_jpn_yue_eng_ko_spectok.bpe.model
145+
146+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/en.wav
147+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/yue.wav
148+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/resolve/main/test_wavs/zh.wav
149+
150+
for i in $(seq 0 17); do
151+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/test_wavs/yue-$i.wav
152+
done
153+
154+
rm -f README.md || true
155+
156+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09/resolve/main/README.md
157+
158+
echo "export to onnx"
159+
python3 ./export_onnx.py
160+
161+
ls -lh *.onnx
162+
163+
source /usr/local/Ascend/ascend-toolkit/set_env.sh
164+
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib/linux/x86_64:$LD_LIBRARY_PATH
165+
166+
atc --model=./model.onnx \
167+
--framework=5 \
168+
--host_env_os=linux \
169+
--host_env_cpu=aarch64 \
170+
--output=model \
171+
--input_format=ND \
172+
--input_shape="x:1,-1,560;prompt:4" \
173+
--soc_version="Ascend910B"
174+
175+
ls -lh *.om
176+
177+
echo "collect results"
178+
d=sherpa-onnx-ascend-910B-sense-voice-zh-en-ja-ko-yue-2025-09-09
179+
180+
mkdir -p $d
181+
mkdir -p $d/test_wavs
182+
183+
cp -v README.md $d
184+
cp -v model_linux_aarch64.om $d/model.om
185+
cp -v tokens.txt $d
186+
cp -v test_om.py $d
187+
cp -v *.wav $d/test_wavs
188+
ls -lh $d
189+
tar cjfv $d.tar.bz2 $d
190+
ls -lh *.tar.bz2
191+
rm -rf $d
192+
193+
echo "----show---"
194+
ls -lh *.tar.bz2
195+
196+
mv *.tar.bz2 ../../..
197+
198+
- name: Release
199+
if: github.repository_owner == 'csukuangfj'
200+
uses: svenstaro/upload-release-action@v2
201+
with:
202+
file_glob: true
203+
file: ./*.tar.bz2
204+
overwrite: true
205+
repo_name: k2-fsa/sherpa-onnx
206+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
207+
tag: asr-models
208+
209+
- name: Release
210+
if: github.repository_owner == 'k2-fsa'
211+
uses: svenstaro/upload-release-action@v2
212+
with:
213+
file_glob: true
214+
file: ./*.tar.bz2
215+
overwrite: true
216+
tag: asr-models
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
3+
4+
from typing import List, Tuple
5+
6+
import sentencepiece as spm
7+
import torch
8+
9+
from torch_model import SenseVoiceSmall
10+
11+
12+
def load_cmvn(filename) -> Tuple[List[float], List[float]]:
13+
neg_mean = None
14+
inv_stddev = None
15+
16+
with open(filename) as f:
17+
for line in f:
18+
if not line.startswith("<LearnRateCoef>"):
19+
continue
20+
t = line.split()[3:-1]
21+
22+
if neg_mean is None:
23+
neg_mean = list(map(lambda x: float(x), t))
24+
else:
25+
inv_stddev = list(map(lambda x: float(x), t))
26+
27+
return neg_mean, inv_stddev
28+
29+
30+
def generate_tokens(sp):
31+
with open("tokens.txt", "w", encoding="utf-8") as f:
32+
for i in range(sp.vocab_size()):
33+
f.write(f"{sp.id_to_piece(i)} {i}\n")
34+
print("saved to tokens.txt")
35+
36+
37+
class ModelWrapper(torch.nn.Module):
38+
def __init__(self, m):
39+
super().__init__()
40+
self.m = m
41+
42+
def forward(self, x, prompt):
43+
logits = self.m(x[None], prompt)[0]
44+
part1 = logits[:4]
45+
part2 = logits[4:]
46+
part1 = part1.reshape(4, 25055)
47+
part2 = part2.reshape(x.size(0), 25055)
48+
return part1, part2
49+
50+
51+
@torch.no_grad()
52+
def main():
53+
sp = spm.SentencePieceProcessor()
54+
sp.load("./chn_jpn_yue_eng_ko_spectok.bpe.model")
55+
generate_tokens(sp)
56+
57+
print("loading model")
58+
59+
state_dict = torch.load("./model.pt", map_location="cpu")
60+
if "state_dict" in state_dict:
61+
state_dict = state_dict["state_dict"]
62+
63+
neg_mean, inv_stddev = load_cmvn("./am.mvn")
64+
65+
neg_mean = torch.tensor(neg_mean, dtype=torch.float32)
66+
inv_stddev = torch.tensor(inv_stddev, dtype=torch.float32)
67+
68+
model = SenseVoiceSmall(neg_mean=neg_mean, inv_stddev=inv_stddev)
69+
model.load_state_dict(state_dict)
70+
model.eval()
71+
del state_dict
72+
73+
model = ModelWrapper(model)
74+
model.eval()
75+
76+
x = torch.randn(1, 93, 560, dtype=torch.float32)
77+
78+
language = 3
79+
text_norm = 15
80+
prompt = torch.tensor([language, 1, 2, text_norm], dtype=torch.int32)
81+
82+
opset_version = 14
83+
filename = "model.onnx"
84+
torch.onnx.export(
85+
model.m,
86+
(x, prompt),
87+
filename,
88+
opset_version=opset_version,
89+
input_names=["x", "prompt"],
90+
output_names=["logits"],
91+
dynamic_axes={
92+
"x": {0: "N", 1: "T"},
93+
"logits": {0: "N", 1: "T_4"},
94+
},
95+
)
96+
print(f"saved to {filename}")
97+
98+
99+
if __name__ == "__main__":
100+
torch.manual_seed(20251018)
101+
main()

0 commit comments

Comments
 (0)