@@ -102,9 +102,10 @@ def __init__(self):
102102 self .exit_img = image .load ('./assets/exit.jpg' )
103103 self .ai_isp = bool (int (app .get_sys_config_kv ("npu" , "ai_isp" , "1" )))
104104 if self .ai_isp is True :
105- app .set_sys_config_kv ("npu" , "ai_isp" , False )
105+ app .set_sys_config_kv ("npu" , "ai_isp" , "0" )
106106
107107 vlm_model = self .get_vl_model ()
108+ self .support_zh = True
108109 self .__show_load_info (f'loading { vlm_model } ..' )
109110 if vlm_model == "qwen3-vl" :
110111 try :
@@ -130,6 +131,10 @@ def __init__(self):
130131 app .set_exit_flag (True )
131132 time .sleep_ms (100 )
132133 exit (0 )
134+ elif vlm_model == 'smolvlm' :
135+ self .vlm = nn .SmolVLM ('/root/models/smolvlm-256m-instruct-maixcam2/model.mud' )
136+ self .vlm .set_system_prompt ("You are a helpful vision-to-text assistant." )
137+ self .support_zh = False
133138 elif vlm_model == 'internvl' :
134139 self .vlm = nn .InternVL ('/root/models/InternVL2.5-1B/model.mud' )
135140 self .vlm .set_system_prompt ("你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型, 英文名叫InternVL, 是一个有用无害的人工智能助手。" )
@@ -174,7 +179,7 @@ def check_memory(self):
174179 exit (0 )
175180
176181 def get_vl_model (self ):
177- model_list = ["internvl" , "qwen3-vl" ]
182+ model_list = ["smolvlm" , " internvl" , "qwen3-vl" ]
178183 model_list_num = len (model_list )
179184 ui_box = []
180185 rect_box = [0 , 0 , self .disp_w // 2 , self .disp_h // 2 ]
@@ -210,10 +215,7 @@ def get_vl_model(self):
210215 exit_img_x = 0
211216 exit_img_y = 0
212217 img .draw_image (exit_img_x , exit_img_y , self .exit_img )
213-
214- if ts_data [2 ] and 0 <= ts_data [0 ]<= self .exit_img .width ()* 4 + exit_img_x and 0 <= ts_data [1 ]<= self .exit_img .height ()* 4 + exit_img_y :
215- print ('exit' )
216- app .set_exit_flag (True )
218+ self .check_exit ()
217219
218220 self .disp .show (img )
219221 time .sleep_ms (50 )
@@ -252,6 +254,16 @@ def run_vlm(self, img: image.Image, msg: str):
252254 t .start ()
253255 # t.run()
254256
257+ def check_exit (self ):
258+ ts_data = self .ts .read ()
259+ exit_img_x = 0
260+ exit_img_y = 0
261+ exit_img_w = self .exit_img .width ()* 8
262+ exit_img_h = self .exit_img .height ()* 8
263+ if ts_data [2 ] and 0 <= ts_data [0 ]<= exit_img_w + exit_img_x and 0 <= ts_data [1 ]<= exit_img_h + exit_img_y :
264+ print ('exit' )
265+ app .set_exit_flag (True )
266+
255267 def show_ui (self ):
256268 img = image .Image (self .disp_w , self .disp_h , bg = image .COLOR_BLACK )
257269 ts_data = self .ts .read ()
@@ -276,22 +288,23 @@ def show_ui(self):
276288 exit_img_x = 0
277289 exit_img_y = 0
278290 img .draw_image (exit_img_x , exit_img_y , self .exit_img )
279-
280- if ts_data [2 ] and 0 <= ts_data [0 ]<= self .exit_img .width ()* 4 + exit_img_x and 0 <= ts_data [1 ]<= self .exit_img .height ()* 4 + exit_img_y :
281- print ('exit' )
282- app .set_exit_flag (True )
291+ self .check_exit ()
283292
284293 # en/zh
285294 size = image .string_size ("ZH" , scale = 2 )
286295 if self .language == 'zh' :
287296 img .draw_string (self .disp_w - size .width (), 0 , "ZH" , image .COLOR_WHITE , scale = 2 )
288297 else :
289298 img .draw_string (self .disp_w - size .width (), 0 , "EN" , image .COLOR_WHITE , scale = 2 )
290- if ts_data [2 ] and self .disp_w - size .width ()* 2 <= ts_data [0 ]<= self .disp_w and 0 <= ts_data [1 ]<= size .height () * 2 :
291- if self .language == 'zh' :
292- self .language = 'en'
293- else :
294- self .language = 'zh'
299+
300+ if self .support_zh :
301+ if ts_data [2 ] and self .disp_w - size .width ()* 2 <= ts_data [0 ]<= self .disp_w and 0 <= ts_data [1 ]<= size .height () * 2 :
302+ if self .language == 'zh' :
303+ self .language = 'en'
304+ else :
305+ self .language = 'zh'
306+ else :
307+ self .language = 'en'
295308 self .disp .show (img )
296309
297310
@@ -300,6 +313,7 @@ def __vlm_on_reply(self, obj, resp):
300313 if self .vlm_img :
301314 self .page_text .add_text (resp .msg_new )
302315 # self.show_ui()
316+ self .check_exit ()
303317
304318 def __show_load_info (self , text : str , x :int = 0 , y :int = 0 , color :image .Color = image .COLOR_WHITE ):
305319 if self .disp :
@@ -312,25 +326,19 @@ def __show_load_info(self, text: str, x:int = 0, y:int = 0, color:image.Color=im
312326 img .draw_string (x , y , text , image .COLOR_WHITE )
313327 self .disp .show (img )
314328
315- def __draw_string_upper_center (self , img , y :int = 8 , text :str = "" , color :image .Color = image .COLOR_WHITE ):
316- x = 0
317- text_size = image .string_size (text )
318- x = (img .width () - text_size .width ()) // 2
319- img .draw_string (x , y , text , color )
320-
321329 def run (self ):
322330 while not app .need_exit ():
323331 with self .vlm_thread_lock :
324332 sta = self .sta
325333
326334 if sta == self .Status .IDLE :
327- print ('IDLE' )
335+ # print('IDLE')
328336 self .vlm_img = self .cam .read ()
329337 if self .vlm_img :
330338 with self .vlm_thread_lock :
331339 self .sta = self .Status .VLM_START
332340 elif sta == self .Status .VLM_START :
333- print ('VLM_START' )
341+ # print('VLM_START')
334342 if self .vlm_img :
335343 if self .language == 'zh' :
336344 msg = '简单描述这张图片'
@@ -340,10 +348,10 @@ def run(self):
340348 with self .vlm_thread_lock :
341349 self .sta = self .Status .VLM_RUNNING
342350 elif sta == self .Status .VLM_RUNNING :
343- print ('VLM_RUNNING' )
351+ # print('VLM_RUNNING')
344352 self .vlm_img = self .cam .read ()
345353 elif sta == self .Status .VLM_STOP :
346- print ('VLM_STOP' )
354+ # print('VLM_STOP')
347355 with self .vlm_thread_lock :
348356 self .sta = self .Status .IDLE
349357
0 commit comments