Skip to content

Commit 05048f5

Browse files
committed
* add app_vlm
1 parent 75abff7 commit 05048f5

File tree

6 files changed

+249
-0
lines changed

6 files changed

+249
-0
lines changed

projects/app_vlm/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
data
2+
__pycache__
3+
dist

projects/app_vlm/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
AI VLM
2+
3+
Offline AI Vision Language Model
4+
5+
6+
7+

projects/app_vlm/app.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
id: ai_vlm
2+
name: AI VLM
3+
name[zh]: AI 视觉大模型
4+
version: 1.0.3
5+
icon: assets/icon.png
6+
author: Sipeed Ltd
7+
desc: AI Vision Language Model
8+
desc[zh]: AI 视觉大模型
9+
exclude:
10+
- dist
11+
- build
12+
- .gitignore
13+
files:
14+
- assets
15+
- app.yaml
16+
- main.py
17+
- README.md

projects/app_vlm/assets/exit.jpg

785 Bytes
Loading

projects/app_vlm/assets/icon.png

2.67 KB
Loading

projects/app_vlm/main.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
from maix import nn, camera, time, display, app, image, touchscreen
2+
import threading
3+
4+
class PagedText:
5+
def __init__(self, page_width = -1, page_height = -1):
6+
"""
7+
page_width: 每页宽度
8+
page_height: 每页最大行数
9+
char_width_func: 一个函数, 输入字符返回宽度 (例如 lambda c: 1 或字典映射)
10+
"""
11+
self.page_width = page_width
12+
self.page_height = page_height
13+
self.pages = [[]] # 每个元素是 page, page 是行的列表, 行是 (text, width)
14+
15+
def reset(self, page_width, page_height):
16+
self.page_width = page_width
17+
self.page_height = page_height
18+
self.pages = [[]]
19+
20+
def add_text(self, text):
21+
current_page = self.pages[-1]
22+
if not current_page:
23+
current_page.append(("", 0, 0)) # 初始化第一行
24+
25+
page_height_used = sum(line[2] for line in current_page)
26+
27+
for ch in text:
28+
line_text, _, line_h = current_page[-1]
29+
new_line_text = line_text + ch
30+
size = image.string_size(new_line_text)
31+
ch_w = size[0]
32+
ch_h = size[1]
33+
34+
# 尝试放到当前行
35+
if ch_w <= self.page_width:
36+
# 更新行
37+
new_w = ch_w
38+
new_h = max(line_h, ch_h)
39+
# 替换行
40+
current_page[-1] = (new_line_text, new_w, new_h)
41+
else:
42+
# 需要换行
43+
if page_height_used + line_h <= self.page_height:
44+
current_page.append((ch, ch_w, ch_h))
45+
page_height_used += line_h # 累加上一行高度
46+
else:
47+
# 需要换页
48+
self.pages.append([(ch, ch_w, ch_h)])
49+
current_page = self.pages[-1]
50+
page_height_used = ch_h
51+
52+
page_height_used = sum(line[2] for line in current_page)
53+
# print("OVER", line_text, line_w, line_h, page_height_used)
54+
55+
def clear(self):
56+
self.pages = [[]]
57+
58+
def print(self):
59+
for i, page in enumerate(self.pages):
60+
print(f"Page {i+1}:")
61+
page_height = sum(line[2] for line in page)
62+
for line_text, line_width, line_height in page:
63+
print(f" '{line_text}' (w={line_width}, h={line_height})")
64+
print(f" -> total height used = {page_height}")
65+
print()
66+
67+
def draw_last_page_on(self, img:image.Image, color: image.Color = image.COLOR_WHITE):
68+
if img.width() != self.page_width or img.height() != self.page_height:
69+
return
70+
71+
current_page = self.pages[-1]
72+
if not current_page:
73+
return
74+
75+
height = 0
76+
for line_text, _, line_height in current_page:
77+
img.draw_string(0, height, line_text, color, wrap_space=0)
78+
height += line_height
79+
80+
class App:
81+
class Status:
82+
IDLE=0,
83+
VLM_START=1,
84+
VLM_RUNNING=2,
85+
VLM_STOP=3
86+
87+
def __init__(self):
88+
image.load_font("sourcehansans", "/maixapp/share/font/SourceHanSansCN-Regular.otf", size = 20)
89+
image.set_default_font("sourcehansans")
90+
self.disp = display.Display()
91+
self.disp_w = self.disp.width()
92+
self.disp_h = self.disp.height()
93+
self.__show_load_info('loading touchscreen..')
94+
self.ts = touchscreen.TouchScreen()
95+
96+
self.cam = camera.Camera(640, 360)
97+
98+
self.exit_img = image.load('./assets/exit.jpg')
99+
ai_isp_on = bool(int(app.get_sys_config_kv("npu", "ai_isp", "1")))
100+
if ai_isp_on is True:
101+
img = image.Image(self.disp_w, self.disp_h, bg=image.COLOR_BLACK)
102+
err_msg = "Please trun off AI ISP first via the Settings app(Settings->AI ISP)"
103+
img.draw_string(0, 0, err_msg, image.COLOR_RED)
104+
self.disp.show(img)
105+
while not app.need_exit():
106+
ts_data = self.ts.read()
107+
if ts_data[2]:
108+
app.set_exit_flag(True)
109+
time.sleep_ms(100)
110+
111+
self.__show_load_info('loading vlm..')
112+
self.vlm = nn.InternVL('/root/models/InternVL2.5-1B/model.mud')
113+
self.vlm_in_w = self.vlm.input_width()
114+
self.vlm_in_h = self.vlm.input_height()
115+
self.vlm_in_fmt = self.vlm.input_format()
116+
self.vlm.set_system_prompt("你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型, 英文名叫InternVL, 是一个有用无害的人工智能助手。")
117+
self.vlm.set_reply_callback(self.__vlm_on_reply)
118+
self.vlm_img: image.Image | None = None
119+
self.vlm_thread_lock = threading.Lock()
120+
self.vlm_result:str = ''
121+
self.page_text = PagedText(self.disp_w, self.disp_h - self.cam.height())
122+
123+
self.sta = self.Status.IDLE
124+
125+
def __vlm_thread(self, vlm, img:image.Image, msg: str):
126+
vlm.set_image(img, image.Fit.FIT_CONTAIN)
127+
resp = vlm.send(msg)
128+
print(resp)
129+
with self.vlm_thread_lock:
130+
self.sta = self.Status.VLM_STOP
131+
132+
def run_vlm(self, img: image.Image, msg: str):
133+
self.page_text.clear()
134+
t = threading.Thread(target=self.__vlm_thread, args=[self.vlm, img, msg], daemon=True)
135+
t.start()
136+
# t.run()
137+
138+
def show_ui(self):
139+
img = image.Image(self.disp_w, self.disp_h, bg=image.COLOR_BLACK)
140+
ts_data = self.ts.read()
141+
if self.vlm_img:
142+
# vlm img
143+
img.draw_image(0, 0, self.vlm_img)
144+
text_img_x = 0
145+
text_img_y = self.cam.height()
146+
text_img = image.Image(self.disp_w, self.disp_h - self.cam.height(), bg=image.COLOR_BLACK)
147+
148+
# msg img
149+
self.page_text.draw_last_page_on(text_img, image.COLOR_WHITE)
150+
img.draw_image(text_img_x, text_img_y, text_img)
151+
else:
152+
text_img_x = 0
153+
text_img_y = self.cam.height()
154+
text_img = image.Image(self.disp_w, self.disp_h - self.cam.height(), bg=image.COLOR_BLACK)
155+
text_img.draw_string(0, 0, "running..", image.COLOR_WHITE)
156+
img.draw_image(text_img_x, text_img_y, text_img)
157+
158+
# exit img
159+
exit_img_x = 0
160+
exit_img_y = 0
161+
img.draw_image(exit_img_x, exit_img_y, self.exit_img)
162+
163+
if ts_data[2] and 0<=ts_data[0]<=self.exit_img.width() + exit_img_x*2 and 0 <=ts_data[1]<=self.exit_img.height() + exit_img_y*2:
164+
print('exit')
165+
app.set_exit_flag(True)
166+
167+
self.disp.show(img)
168+
169+
170+
def __vlm_on_reply(self, obj, resp):
171+
print(resp.msg_new)
172+
if self.vlm_img:
173+
self.page_text.add_text(resp.msg_new)
174+
# self.show_ui()
175+
176+
def __show_load_info(self, text: str, x:int = 0, y:int = 0, color:image.Color=image.COLOR_WHITE):
177+
if self.disp:
178+
str_size = image.string_size(text)
179+
img = image.Image(self.disp_w, self.disp_h, bg=image.COLOR_BLACK)
180+
if x == 0:
181+
x = (img.width() - str_size.width()) // 2
182+
if y == 0:
183+
y = (img.height() - str_size.height()) // 2
184+
img.draw_string(x, y, text, image.COLOR_WHITE)
185+
self.disp.show(img)
186+
187+
def __draw_string_upper_center(self, img, y:int=8, text:str="", color:image.Color=image.COLOR_WHITE):
188+
x = 0
189+
text_size = image.string_size(text)
190+
x = (img.width() - text_size.width()) // 2
191+
img.draw_string(x, y, text, color)
192+
193+
def run(self):
194+
while not app.need_exit():
195+
with self.vlm_thread_lock:
196+
sta = self.sta
197+
198+
if sta == self.Status.IDLE:
199+
print('IDLE')
200+
self.vlm_img = self.cam.read()
201+
if self.vlm_img:
202+
with self.vlm_thread_lock:
203+
self.sta = self.Status.VLM_START
204+
elif sta == self.Status.VLM_START:
205+
print('VLM_START')
206+
if self.vlm_img:
207+
self.run_vlm(self.vlm_img, 'Describe the picture')
208+
with self.vlm_thread_lock:
209+
self.sta = self.Status.VLM_RUNNING
210+
elif sta == self.Status.VLM_RUNNING:
211+
print('VLM_RUNNING')
212+
self.vlm_img = self.cam.read()
213+
elif sta == self.Status.VLM_STOP:
214+
print('VLM_STOP')
215+
with self.vlm_thread_lock:
216+
self.sta = self.Status.IDLE
217+
218+
self.show_ui()
219+
220+
if __name__ == '__main__':
221+
appication = App()
222+
appication.run()

0 commit comments

Comments
 (0)