Skip to content

Commit ed8d347

Browse files
committed
* add ai_chat apps
1 parent cd09875 commit ed8d347

File tree

8 files changed

+282
-1
lines changed

8 files changed

+282
-1
lines changed

projects/app_chat/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
data
2+
__pycache__
3+
dist

projects/app_chat/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
AI Chat
2+
3+
Offline AI Chat
4+
5+
6+
7+

projects/app_chat/app.png

5.76 KB
Loading

projects/app_chat/app.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
id: ai_chat
2+
name: AI Chat
3+
name[zh]: AI聊天
4+
version: 1.0.3
5+
icon: app.png
6+
author: Sipeed Ltd
7+
desc: AI Chat
8+
desc[zh]: AI 聊天
9+
exclude:
10+
- dist
11+
- build
12+
- .gitignore
13+
files:
14+
- assets/exit.jpg
15+
- assets/icon.png
16+
- app.yaml
17+
- main.py
18+
- README.md

projects/app_chat/assets/exit.jpg

785 Bytes
Loading

projects/app_chat/assets/icon.png

1.87 KB
Loading

projects/app_chat/main.py

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
2+
from maix import nn, audio, time, display, app, image, touchscreen
3+
import threading
4+
from queue import Queue, Empty
5+
import re
6+
7+
class App:
8+
def __init__(self):
9+
image.load_font("sourcehansans", "/maixapp/share/font/SourceHanSansCN-Regular.otf", size = 20)
10+
image.set_default_font("sourcehansans")
11+
self.disp = display.Display()
12+
self.disp_w = 320
13+
self.disp_h = 240
14+
self.__show_load_info('loading touchscreen..')
15+
self.ts = touchscreen.TouchScreen()
16+
17+
self.exit_img = image.load('./assets/exit.jpg')
18+
# self.__show_load_info('loading key..')
19+
# self.key_obj = key.Key(self.on_key)
20+
# self.key_status = False
21+
22+
self.__show_load_info('loading recorder..')
23+
self.default_wav_path = "/root/audio.wav"
24+
self.default_record_samplerate = 16000
25+
self.default_record_volume = 70
26+
self.recorder = audio.Recorder(sample_rate=self.default_record_samplerate)
27+
self.recorder.volume(self.default_record_volume)
28+
29+
self.__show_load_info("loading webrtcvad..")
30+
try:
31+
import webrtcvad
32+
self.vad = webrtcvad.Vad()
33+
self.vad.set_mode(3)
34+
self.vad_duration_ms = 30
35+
except:
36+
self.vad = None
37+
38+
self.__show_load_info('loading player..')
39+
self.player = audio.Player(sample_rate=44100)
40+
self.player.volume(50)
41+
self.player_queue = Queue(100)
42+
self.player_thread = threading.Thread(target=self.player_thread_handle, daemon=True)
43+
self.player_thread.start()
44+
45+
self.__show_load_info('loading whisper..')
46+
ai_isp_on = bool(int(app.get_sys_config_kv("npu", "ai_isp", "1")))
47+
if ai_isp_on is True:
48+
img = image.Image(320, 240, bg=image.COLOR_BLACK)
49+
err_msg = "You need edit /boot/configs to set ai_isp_on to 0"
50+
err_msg_size = image.string_size(err_msg)
51+
img.draw_string((img.width() - err_msg_size.width()) // 2, (img.height() - err_msg_size.height()) // 2, err_msg, image.COLOR_RED)
52+
self.disp.show(img)
53+
while not app.need_exit():
54+
ts_data = self.ts.read()
55+
if ts_data[2]:
56+
app.set_exit_flag(True)
57+
time.sleep_ms(100)
58+
self.whisper = nn.Whisper(model="/root/models/whisper-base/whisper-base.mud", language="en")
59+
60+
self.__show_load_info('loading llm..')
61+
# /root/models/Qwen2.5-0.5B-Instruct/model.mud
62+
# /root/models/Qwen2.5-1.5B-Instruct/model.mud
63+
self.llm = nn.Qwen("/root/models/Qwen2.5-1.5B-Instruct/model.mud")
64+
self.llm.set_system_prompt("You are Qwen, created by Alibaba Cloud. You are a helpful assistant.")
65+
self.llm.set_reply_callback(self.__llm_on_reply)
66+
self.llm_last_msg = ""
67+
68+
self.__show_load_info('loading melotts..')
69+
self.tts = nn.MeloTTS(model="/root/models/melotts/melotts-zh.mud", speed = 0.8, language='en')
70+
71+
self.tts_queue = Queue(100)
72+
self.tts_thread = threading.Thread(target=self.tts_thread_handle, daemon=True)
73+
self.tts_thread.start()
74+
75+
def player_thread_handle(self):
76+
while not app.need_exit():
77+
try:
78+
pcm = self.player_queue.get(timeout=500)
79+
print('play start')
80+
t = time.ticks_ms()
81+
self.player.play(pcm)
82+
print('player cost', time.ticks_ms() - t)
83+
print('play finish')
84+
except Empty:
85+
continue
86+
87+
def tts_thread_handle(self):
88+
while not app.need_exit():
89+
try:
90+
msg = self.tts_queue.get(timeout=500)
91+
print('tts queue get:', msg)
92+
t = time.ticks_ms()
93+
pcm = self.tts.infer(msg, output_pcm=True)
94+
print('tts infer cost', time.ticks_ms() - t)
95+
self.player_queue.put(pcm)
96+
except Empty:
97+
continue
98+
99+
def __llm_on_reply(self, obj, resp):
100+
print(resp.msg_new, end="")
101+
img = image.Image(320, 240, bg=image.COLOR_BLACK)
102+
self.__draw_string_upper_center(img, text="Run LLM..", color=image.COLOR_GREEN)
103+
# img.draw_string(0, 0, "Run LLM..", image.COLOR_GREEN)
104+
img.draw_string(0, 30, resp.msg, image.COLOR_WHITE)
105+
self.disp.show(img)
106+
107+
self.llm_last_msg += resp.msg_new
108+
parts=re.split(r"[,.!?]", self.llm_last_msg)
109+
# print('parts', parts)
110+
if len(parts) > 1:
111+
if "!" in self.llm_last_msg:
112+
push_msg = parts[0] + "!"
113+
elif "," in self.llm_last_msg:
114+
push_msg = parts[0] + ","
115+
elif "." in self.llm_last_msg:
116+
push_msg = parts[0] + "."
117+
elif "?" in self.llm_last_msg:
118+
push_msg = parts[0] + "?"
119+
else:
120+
push_msg = parts[0]
121+
pass
122+
self.llm_last_msg = parts[-1]
123+
self.tts_queue.put(push_msg)
124+
125+
def __show_load_info(self, text: str, x:int = 0, y:int = 0, color:image.Color=image.COLOR_WHITE):
126+
if self.disp:
127+
str_size = image.string_size(text)
128+
img = image.Image(self.disp_w, self.disp_h, bg=image.COLOR_BLACK)
129+
if x == 0:
130+
x = (img.width() - str_size.width()) // 2
131+
if y == 0:
132+
y = (img.height() - str_size.height()) // 2
133+
img.draw_string(x, y, text, image.COLOR_WHITE)
134+
self.disp.show(img)
135+
136+
def __draw_string_upper_center(self, img, y:int=8, text:str="", color:image.Color=image.COLOR_WHITE):
137+
x = 0
138+
text_size = image.string_size(text)
139+
x = (img.width() - text_size.width()) // 2
140+
img.draw_string(x, y, text, color)
141+
142+
def __reset_recorder(self, save_file: bool):
143+
if self.recorder:
144+
del self.recorder
145+
if save_file:
146+
self.recorder = audio.Recorder(self.default_wav_path, self.default_record_samplerate)
147+
else:
148+
self.recorder = audio.Recorder(sample_rate=self.default_record_samplerate)
149+
self.recorder.volume(self.default_record_volume)
150+
151+
def run(self):
152+
class Status:
153+
IDLE=0
154+
SPEAKING=1
155+
TRANSCRIBE=2
156+
TTS=3
157+
LLM=4
158+
VAD=5
159+
status = Status.IDLE
160+
record_ms = 4000
161+
asr_result = None
162+
llm_result = None
163+
start_vad = False
164+
165+
while not app.need_exit():
166+
img = image.Image(320, 240, bg=image.COLOR_BLACK)
167+
168+
ts_data = self.ts.read()
169+
if status == Status.VAD:
170+
self.__draw_string_upper_center(img, text="VAD..", color=image.COLOR_GREEN)
171+
# img.draw_string(0, 0, "VAD..", image.COLOR_GREEN)
172+
elif status == Status.SPEAKING:
173+
self.__draw_string_upper_center(img, text="Speaking..", color=image.COLOR_GREEN)
174+
# img.draw_string(0, 0, "Speaking..", image.COLOR_GREEN)
175+
elif status == Status.TRANSCRIBE:
176+
self.__draw_string_upper_center(img, text="Transcribing..", color=image.COLOR_GREEN)
177+
# img.draw_string(0, 0, "Transcribing..", image.COLOR_GREEN)
178+
elif status == Status.LLM:
179+
self.__draw_string_upper_center(img, text="Run LLM..", color=image.COLOR_GREEN)
180+
# img.draw_string(0, 0, "Run LLM..", image.COLOR_GREEN)
181+
if asr_result:
182+
img.draw_string(0, 30, asr_result, image.COLOR_WHITE)
183+
elif llm_result:
184+
img.draw_string(0, 30, llm_result, image.COLOR_WHITE)
185+
elif status == Status.TTS:
186+
self.__draw_string_upper_center(img, text="Run MelloTTS..", color=image.COLOR_GREEN)
187+
# img.draw_string(0, 0, "Run MelloTTS..", image.COLOR_GREEN)
188+
if asr_result:
189+
img.draw_string(0, 30, asr_result, image.COLOR_WHITE)
190+
elif llm_result:
191+
img.draw_string(0, 30, llm_result, image.COLOR_WHITE)
192+
else:
193+
# img.draw_string(0, 0, "Waiting press touchscreen..", image.COLOR_GREEN)
194+
self.__draw_string_upper_center(img, text="Waiting press touchscreen..", color=image.COLOR_GREEN)
195+
if asr_result:
196+
img.draw_string(0, 30, asr_result, image.COLOR_WHITE)
197+
elif llm_result:
198+
img.draw_string(0, 30, llm_result, image.COLOR_WHITE)
199+
200+
exit_img_x = 0
201+
exit_img_y = 0
202+
img.draw_image(exit_img_x, exit_img_y, self.exit_img)
203+
if ts_data[2] and 0<=ts_data[0]<=self.exit_img.width() + exit_img_x*2 and 0 <=ts_data[1]<=self.exit_img.height() + exit_img_y*2:
204+
print('exit')
205+
app.set_exit_flag(True)
206+
self.disp.show(img)
207+
208+
if status == Status.IDLE:
209+
if ts_data[2]:
210+
if self.vad:
211+
start_vad = not start_vad
212+
status = Status.VAD
213+
else:
214+
status = Status.SPEAKING
215+
elif status == Status.VAD:
216+
if self.vad:
217+
if start_vad:
218+
pcm = self.recorder.record(self.vad_duration_ms)
219+
if pcm and len(pcm) > 0:
220+
is_speech = self.vad.is_speech(pcm, self.default_record_samplerate)
221+
if is_speech:
222+
start_vad = False
223+
status = Status.SPEAKING
224+
else:
225+
status = Status.SPEAKING
226+
elif status == Status.SPEAKING:
227+
self.__reset_recorder(True)
228+
self.recorder.record(record_ms)
229+
self.recorder.finish()
230+
self.__reset_recorder(False)
231+
status = Status.TRANSCRIBE
232+
elif status == Status.TRANSCRIBE:
233+
asr_result = self.whisper.transcribe(self.default_wav_path)
234+
print(asr_result)
235+
status = Status.LLM
236+
elif status == Status.LLM:
237+
if asr_result:
238+
llm_result0 = self.llm.send(asr_result)
239+
llm_result = llm_result0.msg
240+
self.llm.clear_context()
241+
print(llm_result)
242+
status = Status.TTS
243+
asr_result = None
244+
elif status == Status.TTS:
245+
if self.tts_queue.empty():
246+
status = Status.IDLE
247+
else:
248+
status = Status.IDLE
249+
time.sleep_ms(5)
250+
251+
if __name__ == '__main__':
252+
appication = App()
253+
appication.run()

projects/build_all.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ set -e
77
# 定义不同平台的黑名单
88

99
blacklist_linux=()
10-
blacklist_maixcam=("app_yoloworld" "app_vlm" "app_mono_depth_estimation")
10+
blacklist_maixcam=("app_yoloworld" "app_vlm" "app_mono_depth_estimation", "app_chat")
1111
blacklist_maixcam2=("app_mouse_simulator")
1212

1313
#############################################

0 commit comments

Comments
 (0)