Skip to content

Commit 3f798f8

Browse files
committed
* add sensevoice doc
1 parent a20814a commit 3f798f8

File tree

3 files changed

+186
-0
lines changed

3 files changed

+186
-0
lines changed

docs/doc/en/audio/recognize.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@ By default, it recognizes Chinese. To recognize English, pass the language param
6868
whisper = nn.Whisper(model="/root/models/whisper-base/whisper-base.mud", language="en")
6969
```
7070

71+
## Using SenseVoice for Speech-to-Text
72+
73+
Currently, only `MaixCAM2` supports `SenseVoice`, and all SenseVoice-related code is implemented in `Python`, so only Python-side examples are provided.
74+
By default, the system does not include the `SenseVoice` model. Please download it from [here](https://huggingface.co/sipeed/sensevoice-maixcam2) and place it in the `/root/models/` directory.
75+
76+
Before using it, you need to start the `sensevoice.service `service. The command is as follows:
77+
> Note that sensevoice.service starts from the `/root/models/sensevoice-maixcam2` directory by default, so make sure the model is placed under `/root/models/.`
78+
```shell
79+
systemctl start sensevoice.service
80+
```
81+
82+
You can also start it manually:
83+
84+
```shell
85+
cd /root/models/sensevoice-maixcam2
86+
python server.py
87+
```
88+
89+
After the service is started, you can perform speech recognition via HTTP interaction.
90+
For usage, please refer to the example:[asr_sensevoice.py](https://github.com/sipeed/MaixPy/tree/main/examples/audio/asr/sensevoice/asr_sensevoice.py)
91+
7192
## Maix-Speech
7293

7394
[`Maix-Speech`](https://github.com/sipeed/Maix-Speech) is an offline speech recognition library specifically designed for embedded environments. It has been deeply optimized for speech recognition algorithms, significantly reducing memory usage while maintaining excellent recognition accuracy. For detailed information, please refer to the [Maix-Speech Documentation](https://github.com/sipeed/Maix-Speech/blob/master/usage_zh.md).

docs/doc/zh/audio/recognize.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ update:
2323
| ------- | ------- | ----------- | -------- |
2424
| Whisper ||||
2525
| Speech ||||
26+
| Sensevoice ||||
2627

2728
## 使用Whisper做语音转文字
2829

@@ -68,6 +69,24 @@ whisper: 开始愉快的探索吧
6869
whisper = nn.Whisper(model="/root/models/whisper-base/whisper-base.mud", language="en")
6970
```
7071

72+
## 使用Sensevoice做语音转文字
73+
74+
目前只有`MaixCAM2`支持`Sensevoice`, 并且`Sensevoice`的相关代码都是`python`实现的, 因此只提供了python端的示例
75+
系统默认没有`Sensevoice`模型,请从[这里](https://huggingface.co/sipeed/sensevoice-maixcam2)自行下载并放置在`/root/models/`目录下
76+
77+
使用前需要启动`sensevoice.service`服务, 启动的命令如下:
78+
> 需要注意`sensevoice.service`默认是从`/root/models/sensevoice-maixcam2`目录下启动,因此请一定将模型放置在`/root/models/`目录下
79+
```shell
80+
systemctl start sensevoice.service
81+
```
82+
你也可以手动启动
83+
```shell
84+
cd /root/models/sensevoice-maixcam2
85+
python server.py
86+
```
87+
88+
启动服务后可以通过http交互的方法完成语音识别, 使用方法请查看示例:[asr_sensevoice.py](https://github.com/sipeed/MaixPy/tree/main/examples/audio/asr/sensevoice/asr_sensevoice.py)
89+
7190
## Maix-Speech
7291

7392
[`Maix-Speech`](https://github.com/sipeed/Maix-Speech) 是一款专为嵌入式环境设计的离线语音识别库,针对语音识别算法进行了深度优化,显著降低内存占用,同时在识别准确率方面表现优异。详细说明请参考 [Maix-Speech 使用文档](https://github.com/sipeed/Maix-Speech/blob/master/usage_zh.md)
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import requests, json, os
2+
import librosa
3+
4+
class SensevoiceClient:
5+
def __init__(self, model = "", url="http://0.0.0.0:12347", lauguage="auto", stream=False):
6+
self.model = model
7+
self.url = url
8+
self.stream = stream
9+
self.launguage = lauguage
10+
def _check_service(self):
11+
try:
12+
response = requests.get(self.url + '/status')
13+
if response.status_code == 200:
14+
return True
15+
except:
16+
return False
17+
18+
def _start_service(self):
19+
import time
20+
if not self._check_service():
21+
os.system("systemctl start sensevoice.service")
22+
23+
while not self._check_service():
24+
print("Waiting for service to start...")
25+
time.sleep(1)
26+
27+
return True
28+
29+
def _stop_service(self):
30+
os.system("systemctl stop sensevoice.service")
31+
32+
def _get_status(self):
33+
try:
34+
response = requests.get(self.url + '/status')
35+
if response.status_code == 200:
36+
res = json.loads(response.text)
37+
return res["status"]
38+
except:
39+
return "not loaded"
40+
41+
def _start_model(self):
42+
try:
43+
data = {
44+
"model_path": self.model,
45+
"sample_rate": 16000,
46+
"language": self.launguage,
47+
"stream": self.stream
48+
}
49+
response = requests.post(self.url + '/start_model', json=data)
50+
if response.status_code == 200:
51+
res = json.loads(response.text)
52+
return True if res["status"] == 'loaded' else False
53+
except Exception as e:
54+
return False
55+
56+
def _stop_model(self):
57+
try:
58+
response = requests.post(self.url + '/_stop_model')
59+
if response.status_code == 200:
60+
res = json.loads(response.text)
61+
return True if res["status"] == 'not loaded' else False
62+
except Exception as e:
63+
return False
64+
65+
def start(self):
66+
if self._start_service():
67+
print("Service started successfully.")
68+
else:
69+
print("Failed to start service.")
70+
return False
71+
72+
if self._start_model():
73+
print("Model started successfully.")
74+
else:
75+
print("Failed to start model.")
76+
return False
77+
return True
78+
79+
def stop_model(self):
80+
self._stop_model()
81+
82+
def stop(self):
83+
self._stop_model()
84+
self._stop_service()
85+
86+
def get_wave_form(self, path):
87+
waveform, _ = librosa.load(path, sr=16000)
88+
return waveform
89+
90+
def refer(self, filepath):
91+
if self.stream:
92+
print("Streaming mode, use refer_stream() instead.")
93+
return ""
94+
waveform = self.get_wave_form(filepath)
95+
data = {
96+
"audio_data": waveform.tolist(),
97+
"sample_rate": 16000,
98+
"launguage": "auto"
99+
}
100+
try:
101+
response = requests.post(self.url + '/asr', json=data)
102+
if response.status_code == 200:
103+
res = json.loads(response.text)
104+
return res.get("text", "")
105+
else:
106+
print(f"Requests failed: {response.status_code}")
107+
return ""
108+
except Exception as e:
109+
print("Requests failed:", e)
110+
return ""
111+
112+
def refer_stream(self, filepath):
113+
if not self.stream:
114+
print("Streaming mode, use refer() instead.")
115+
return ""
116+
waveform = self.get_wave_form(filepath)
117+
data = {
118+
"audio_data": waveform.tolist(),
119+
"sample_rate": 16000,
120+
"launguage": "auto",
121+
"step": 0.1,
122+
}
123+
print('start post')
124+
try:
125+
response = requests.post(self.url + '/asr_stream', json=data, stream=True)
126+
for line in response.iter_lines():
127+
if line:
128+
chunk = json.loads(line)
129+
yield chunk.get("text", "")
130+
except Exception as e:
131+
print("Requests failed:", e)
132+
return ""
133+
134+
stream = True
135+
client = SensevoiceClient(model="/root/models/sensevoice-maixcam2/model.mud", stream=stream)
136+
if client.start() is False:
137+
print("Failed to start service or model.")
138+
exit()
139+
if not stream:
140+
print('start refer')
141+
text = client.refer("example/zh.mp3")
142+
print(text)
143+
else:
144+
print('start refer stream')
145+
for text in client.refer_stream("example/zh.mp3"):
146+
print(text)

0 commit comments

Comments
 (0)