Skip to content

Commit 73bb883

Browse files
committed
* update vlm
1 parent e35c1f1 commit 73bb883

File tree

1 file changed

+33
-25
lines changed

1 file changed

+33
-25
lines changed

projects/app_vlm/main.py

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,10 @@ def __init__(self):
102102
self.exit_img = image.load('./assets/exit.jpg')
103103
self.ai_isp = bool(int(app.get_sys_config_kv("npu", "ai_isp", "1")))
104104
if self.ai_isp is True:
105-
app.set_sys_config_kv("npu", "ai_isp", False)
105+
app.set_sys_config_kv("npu", "ai_isp", "0")
106106

107107
vlm_model = self.get_vl_model()
108+
self.support_zh = True
108109
self.__show_load_info(f'loading {vlm_model}..')
109110
if vlm_model == "qwen3-vl":
110111
try:
@@ -130,6 +131,10 @@ def __init__(self):
130131
app.set_exit_flag(True)
131132
time.sleep_ms(100)
132133
exit(0)
134+
elif vlm_model == 'smolvlm':
135+
self.vlm = nn.SmolVLM('/root/models/smolvlm-256m-instruct-maixcam2/model.mud')
136+
self.vlm.set_system_prompt("You are a helpful vision-to-text assistant.")
137+
self.support_zh = False
133138
elif vlm_model == 'internvl':
134139
self.vlm = nn.InternVL('/root/models/InternVL2.5-1B/model.mud')
135140
self.vlm.set_system_prompt("你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型, 英文名叫InternVL, 是一个有用无害的人工智能助手。")
@@ -174,7 +179,7 @@ def check_memory(self):
174179
exit(0)
175180

176181
def get_vl_model(self):
177-
model_list = ["internvl", "qwen3-vl"]
182+
model_list = ["smolvlm", "internvl", "qwen3-vl"]
178183
model_list_num = len(model_list)
179184
ui_box = []
180185
rect_box = [0, 0, self.disp_w//2, self.disp_h//2]
@@ -210,10 +215,7 @@ def get_vl_model(self):
210215
exit_img_x = 0
211216
exit_img_y = 0
212217
img.draw_image(exit_img_x, exit_img_y, self.exit_img)
213-
214-
if ts_data[2] and 0<=ts_data[0]<=self.exit_img.width()*4 + exit_img_x and 0 <=ts_data[1]<=self.exit_img.height()*4 + exit_img_y:
215-
print('exit')
216-
app.set_exit_flag(True)
218+
self.check_exit()
217219

218220
self.disp.show(img)
219221
time.sleep_ms(50)
@@ -252,6 +254,16 @@ def run_vlm(self, img: image.Image, msg: str):
252254
t.start()
253255
# t.run()
254256

257+
def check_exit(self):
258+
ts_data = self.ts.read()
259+
exit_img_x = 0
260+
exit_img_y = 0
261+
exit_img_w = self.exit_img.width()*8
262+
exit_img_h = self.exit_img.height()*8
263+
if ts_data[2] and 0<=ts_data[0]<=exit_img_w + exit_img_x and 0 <=ts_data[1]<=exit_img_h + exit_img_y:
264+
print('exit')
265+
app.set_exit_flag(True)
266+
255267
def show_ui(self):
256268
img = image.Image(self.disp_w, self.disp_h, bg=image.COLOR_BLACK)
257269
ts_data = self.ts.read()
@@ -276,22 +288,23 @@ def show_ui(self):
276288
exit_img_x = 0
277289
exit_img_y = 0
278290
img.draw_image(exit_img_x, exit_img_y, self.exit_img)
279-
280-
if ts_data[2] and 0<=ts_data[0]<=self.exit_img.width()*4 + exit_img_x and 0 <=ts_data[1]<=self.exit_img.height()*4 + exit_img_y:
281-
print('exit')
282-
app.set_exit_flag(True)
291+
self.check_exit()
283292

284293
# en/zh
285294
size = image.string_size("ZH", scale=2)
286295
if self.language == 'zh':
287296
img.draw_string(self.disp_w - size.width(), 0, "ZH", image.COLOR_WHITE, scale=2)
288297
else:
289298
img.draw_string(self.disp_w - size.width(), 0, "EN", image.COLOR_WHITE, scale=2)
290-
if ts_data[2] and self.disp_w - size.width()*2<=ts_data[0]<=self.disp_w and 0 <=ts_data[1]<=size.height() * 2:
291-
if self.language == 'zh':
292-
self.language = 'en'
293-
else:
294-
self.language = 'zh'
299+
300+
if self.support_zh:
301+
if ts_data[2] and self.disp_w - size.width()*2<=ts_data[0]<=self.disp_w and 0 <=ts_data[1]<=size.height() * 2:
302+
if self.language == 'zh':
303+
self.language = 'en'
304+
else:
305+
self.language = 'zh'
306+
else:
307+
self.language = 'en'
295308
self.disp.show(img)
296309

297310

@@ -300,6 +313,7 @@ def __vlm_on_reply(self, obj, resp):
300313
if self.vlm_img:
301314
self.page_text.add_text(resp.msg_new)
302315
# self.show_ui()
316+
self.check_exit()
303317

304318
def __show_load_info(self, text: str, x:int = 0, y:int = 0, color:image.Color=image.COLOR_WHITE):
305319
if self.disp:
@@ -312,25 +326,19 @@ def __show_load_info(self, text: str, x:int = 0, y:int = 0, color:image.Color=im
312326
img.draw_string(x, y, text, image.COLOR_WHITE)
313327
self.disp.show(img)
314328

315-
def __draw_string_upper_center(self, img, y:int=8, text:str="", color:image.Color=image.COLOR_WHITE):
316-
x = 0
317-
text_size = image.string_size(text)
318-
x = (img.width() - text_size.width()) // 2
319-
img.draw_string(x, y, text, color)
320-
321329
def run(self):
322330
while not app.need_exit():
323331
with self.vlm_thread_lock:
324332
sta = self.sta
325333

326334
if sta == self.Status.IDLE:
327-
print('IDLE')
335+
# print('IDLE')
328336
self.vlm_img = self.cam.read()
329337
if self.vlm_img:
330338
with self.vlm_thread_lock:
331339
self.sta = self.Status.VLM_START
332340
elif sta == self.Status.VLM_START:
333-
print('VLM_START')
341+
# print('VLM_START')
334342
if self.vlm_img:
335343
if self.language == 'zh':
336344
msg = '简单描述这张图片'
@@ -340,10 +348,10 @@ def run(self):
340348
with self.vlm_thread_lock:
341349
self.sta = self.Status.VLM_RUNNING
342350
elif sta == self.Status.VLM_RUNNING:
343-
print('VLM_RUNNING')
351+
# print('VLM_RUNNING')
344352
self.vlm_img = self.cam.read()
345353
elif sta == self.Status.VLM_STOP:
346-
print('VLM_STOP')
354+
# print('VLM_STOP')
347355
with self.vlm_thread_lock:
348356
self.sta = self.Status.IDLE
349357

0 commit comments

Comments
 (0)