Skip to content

Commit 628b37c

Browse files
committed
feat(multimodal): 完整实现多模态图片支持功能
1. 后端: - 添加图片上传接口 /api/chat/image/upload - 在消息处理中支持图片内容 - 更新数据库模型和迁移脚本 - 实现图片处理工具类 2. 前端: - 添加图片上传和预览组件 - 支持多模态消息发送 - 更新消息显示以支持图片 - 集成图片上传API 3. 文档: - 更新agents-config.md添加多模态图片支持说明 - 更新roadmap.md添加多模态支持计划 主要功能包括: - 支持JPEG、PNG、WebP、GIF、BMP等格式 - 图片大小限制为10MB - 自动处理图片格式转换和压缩 - 支持单图片上传 - 图片以base64编码存储在数据库
1 parent 67ddbe6 commit 628b37c

File tree

14 files changed

+698
-32
lines changed

14 files changed

+698
-32
lines changed

docs/latest/advanced/agents-config.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,29 @@ MYSQL_CHARSET=utf8mb4
123123
```
124124

125125
所有查询限定在只读范围(SELECT、SHOW、DESCRIBE、EXPLAIN),请求会经过表名校验与超时控制,默认限制 60 秒与 100 行输出,并可通过配置调整上限。连接信息会反馈给 LangGraph,智能体可以自动陈述数据库用途并选择更准确的检索策略。
126+
127+
### 多模态图片支持
128+
129+
系统支持接收图片作为输入,与文本结合形成多模态查询。图片支持的核心特性如下:
130+
131+
#### 1. 图片上传与处理
132+
- 通过 `/chat/image/upload` 接口上传图片
133+
- 自动处理图片格式转换和压缩
134+
- 返回 base64 编码的图片数据
135+
- 图片大小限制为 10MB
136+
- 支持的图片格式:JPEG、PNG、WebP、GIF、BMP
137+
- 自动压缩超过 5MB 的图片
138+
139+
当发送包含图片的请求时,消息格式为:
140+
```json
141+
{
142+
"query": "这张图片里有什么?",
143+
"image_content": "<base64编码的图片数据>",
144+
"config": {},
145+
"meta": {}
146+
}
147+
```
148+
149+
智能体会自动识别多模态消息并将其传递给支持图片的模型。如果模型不支持图片,会自动忽略图片内容,只处理文本部分。系统会将图片转换为符合模型要求的格式(通常是 base64 编码的 JPEG 或 PNG),确保与主流多模态模型兼容。
150+
151+
目前仅支持上传单个图片,图片直接以 base64 存储在数据库

docs/latest/changelog/roadmap.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
- 优化知识库详情页面,更加简洁清晰
2323
- 新增对于上传文件的智能体中间件
2424
- 增强文件下载功能
25+
- 新增多模态模型支持(当前仅支持图片,详见文档)
2526

2627
### 修复
2728
- 修复重排序模型实际未生效的问题

server/routers/chat_router.py

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@
2828
)
2929
from src.utils.datetime_utils import utc_isoformat
3030
from src.utils.logging_config import logger
31+
from src.utils.image_processor import process_uploaded_image
32+
33+
34+
# 图片上传响应模型
35+
class ImageUploadResponse(BaseModel):
36+
success: bool
37+
image_content: str | None = None
38+
thumbnail_content: str | None = None
39+
width: int | None = None
40+
height: int | None = None
41+
format: str | None = None
42+
mime_type: str | None = None
43+
size_bytes: int | None = None
44+
error: str | None = None
45+
3146

3247
chat = APIRouter(prefix="/chat", tags=["chat"])
3348

@@ -391,13 +406,18 @@ async def chat_agent(
391406
query: str = Body(...),
392407
config: dict = Body({}),
393408
meta: dict = Body({}),
409+
image_content: str | None = Body(None),
394410
current_user: User = Depends(get_required_user),
395411
db: Session = Depends(get_db),
396412
):
397413
"""使用特定智能体进行对话(需要登录)"""
398414
start_time = asyncio.get_event_loop().time()
399415

400416
logger.info(f"agent_id: {agent_id}, query: {query}, config: {config}, meta: {meta}")
417+
logger.info(f"image_content present: {image_content is not None}")
418+
if image_content:
419+
logger.info(f"image_content length: {len(image_content)}")
420+
logger.info(f"image_content preview: {image_content[:50]}...")
401421

402422
# 确保 request_id 存在
403423
if "request_id" not in meta or not meta.get("request_id"):
@@ -410,6 +430,7 @@ async def chat_agent(
410430
"server_model_name": config.get("model", agent_id),
411431
"thread_id": config.get("thread_id"),
412432
"user_id": current_user.id,
433+
"has_image": bool(image_content),
413434
}
414435
)
415436

@@ -423,8 +444,32 @@ def make_chunk(content=None, **kwargs):
423444
)
424445

425446
async def stream_messages():
426-
# 代表服务端已经收到了请求
427-
yield make_chunk(status="init", meta=meta, msg=HumanMessage(content=query).model_dump())
447+
# 构建多模态消息
448+
if image_content:
449+
# 多模态消息格式
450+
human_message = HumanMessage(
451+
content=[
452+
{"type": "text", "text": query},
453+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_content}"}},
454+
]
455+
)
456+
message_type = "multimodal_image"
457+
else:
458+
# 普通文本消息
459+
human_message = HumanMessage(content=query)
460+
message_type = "text"
461+
462+
# 代表服务端已经收到了请求,发送前端友好的消息格式
463+
init_msg = {"role": "user", "content": query, "type": "human"}
464+
465+
# 如果有图片,添加图片相关信息
466+
if image_content:
467+
init_msg["message_type"] = "multimodal_image"
468+
init_msg["image_content"] = image_content
469+
else:
470+
init_msg["message_type"] = "text"
471+
472+
yield make_chunk(status="init", meta=meta, msg=init_msg)
428473

429474
# Input guard
430475
if conf.enable_content_guard and await content_guard.check(query):
@@ -438,7 +483,7 @@ async def stream_messages():
438483
yield make_chunk(message=f"Error getting agent {agent_id}: {e}", status="error")
439484
return
440485

441-
messages = [{"role": "user", "content": query}]
486+
messages = [human_message]
442487

443488
# 构造运行时配置,如果没有thread_id则生成一个
444489
user_id = str(current_user.id)
@@ -458,8 +503,9 @@ async def stream_messages():
458503
thread_id=thread_id,
459504
role="user",
460505
content=query,
461-
message_type="text",
462-
extra_metadata={"raw_message": HumanMessage(content=query).model_dump()},
506+
message_type=message_type,
507+
image_content=image_content,
508+
extra_metadata={"raw_message": human_message.model_dump()},
463509
)
464510
except Exception as e:
465511
logger.error(f"Error saving user message: {e}")
@@ -543,6 +589,9 @@ async def stream_messages():
543589
except Exception as e:
544590
logger.error(f"Error streaming messages: {e}, {traceback.format_exc()}")
545591

592+
error_msg = f"Error streaming messages: {e}"
593+
error_type = "unexpected_error"
594+
546595
# 保存错误消息到数据库
547596
new_db = db_manager.get_session()
548597
try:
@@ -551,13 +600,13 @@ async def stream_messages():
551600
new_conv_manager,
552601
thread_id,
553602
full_msg=full_msg,
554-
error_message=f"Error streaming messages: {e}" if not full_msg else None,
555-
error_type="unexpected_error",
603+
error_message=error_msg if not full_msg else None,
604+
error_type=error_type,
556605
)
557606
finally:
558607
new_db.close()
559608

560-
yield make_chunk(message=f"Error streaming messages: {e}", status="error")
609+
yield make_chunk(message=error_msg, status="error")
561610

562611
return StreamingResponse(stream_messages(), media_type="application/json")
563612

@@ -766,6 +815,8 @@ async def get_agent_history(
766815
"content": msg.content,
767816
"created_at": msg.created_at.isoformat() if msg.created_at else None,
768817
"error_type": msg.extra_metadata.get("error_type") if msg.extra_metadata else None,
818+
"message_type": msg.message_type, # 添加消息类型字段
819+
"image_content": msg.image_content, # 添加图片内容字段
769820
}
770821

771822
# Add tool calls if present (for AI messages)
@@ -1143,3 +1194,47 @@ async def get_message_feedback(
11431194
except Exception as e:
11441195
logger.error(f"Error getting message feedback: {e}")
11451196
raise HTTPException(status_code=500, detail=f"Failed to get feedback: {str(e)}")
1197+
1198+
1199+
# =============================================================================
1200+
# > === 多模态图片支持分组 ===
1201+
# =============================================================================
1202+
1203+
1204+
@chat.post("/image/upload", response_model=ImageUploadResponse)
1205+
async def upload_image(file: UploadFile = File(...), current_user: User = Depends(get_required_user)):
1206+
"""
1207+
上传并处理图片,返回base64编码的图片数据
1208+
"""
1209+
try:
1210+
# 验证文件类型
1211+
if not file.content_type or not file.content_type.startswith("image/"):
1212+
raise HTTPException(status_code=400, detail="只支持图片文件上传")
1213+
1214+
# 读取文件内容
1215+
image_data = await file.read()
1216+
1217+
# 检查文件大小(10MB限制,超过后会压缩到5MB)
1218+
if len(image_data) > 10 * 1024 * 1024:
1219+
raise HTTPException(status_code=400, detail="图片文件过大,请上传小于10MB的图片")
1220+
1221+
# 处理图片
1222+
result = process_uploaded_image(image_data, file.filename)
1223+
1224+
if not result["success"]:
1225+
raise HTTPException(status_code=400, detail=f"图片处理失败: {result['error']}")
1226+
1227+
logger.info(
1228+
f"用户 {current_user.id} 成功上传图片: {file.filename}, "
1229+
f"尺寸: {result['width']}x{result['height']}, "
1230+
f"格式: {result['format']}, "
1231+
f"大小: {result['size_bytes']} bytes"
1232+
)
1233+
1234+
return ImageUploadResponse(**result)
1235+
1236+
except HTTPException:
1237+
raise
1238+
except Exception as e:
1239+
logger.error(f"图片上传处理失败: {str(e)}, {traceback.format_exc()}")
1240+
raise HTTPException(status_code=500, detail=f"图片处理失败: {str(e)}")

server/utils/migrate.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,14 @@ def get_migrations(self) -> list[tuple[int, str, list[str]]]:
258258

259259
migrations.append((2, "为用户表添加软删除字段", v2_commands))
260260

261+
# 迁移 v3: 为 messages 表添加多模态图片支持
262+
v3_commands: list[str] = []
263+
264+
if not self.check_column_exists("messages", "image_content"):
265+
v3_commands.append("ALTER TABLE messages ADD COLUMN image_content TEXT")
266+
267+
migrations.append((3, "为消息表添加多模态图片支持字段", v3_commands))
268+
261269
# 未来的迁移可以在这里添加
262270
# migrations.append((
263271
# 2,
@@ -305,6 +313,17 @@ def validate_database_schema(db_path: str) -> tuple[bool, list[str]]:
305313
"deleted_at",
306314
],
307315
"operation_logs": ["id", "user_id", "operation", "details", "ip_address", "timestamp"],
316+
"messages": [
317+
"id",
318+
"conversation_id",
319+
"role",
320+
"content",
321+
"message_type",
322+
"created_at",
323+
"token_count",
324+
"extra_metadata",
325+
"image_content",
326+
],
308327
}
309328

310329
for table_name, fields in required_fields.items():

src/storage/conversation/manager.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def add_message(
109109
content: str,
110110
message_type: str = "text",
111111
extra_metadata: dict | None = None,
112+
image_content: str | None = None,
112113
) -> Message:
113114
"""
114115
Add a message to a conversation
@@ -117,8 +118,9 @@ def add_message(
117118
conversation_id: Conversation ID
118119
role: Message role (user/assistant/system/tool)
119120
content: Message content
120-
message_type: Message type (text/tool_call/tool_result)
121+
message_type: Message type (text/tool_call/tool_result/multimodal_image)
121122
extra_metadata: Additional metadata (complete message dump)
123+
image_content: Base64 encoded image content for multimodal messages
122124
123125
Returns:
124126
Created Message object
@@ -129,6 +131,7 @@ def add_message(
129131
content=content,
130132
message_type=message_type,
131133
extra_metadata=extra_metadata or {},
134+
image_content=image_content,
132135
)
133136

134137
self.db.add(message)
@@ -153,6 +156,7 @@ def add_message_by_thread_id(
153156
content: str,
154157
message_type: str = "text",
155158
extra_metadata: dict | None = None,
159+
image_content: str | None = None,
156160
) -> Message | None:
157161
"""
158162
Add a message to a conversation by thread ID
@@ -161,8 +165,9 @@ def add_message_by_thread_id(
161165
thread_id: Thread ID
162166
role: Message role (user/assistant/system/tool)
163167
content: Message content
164-
message_type: Message type (text/tool_call/tool_result)
168+
message_type: Message type (text/tool_call/tool_result/multimodal_image)
165169
extra_metadata: Additional metadata (complete message dump)
170+
image_content: Base64 encoded image content for multimodal messages
166171
167172
Returns:
168173
Created Message object or None if conversation not found
@@ -178,6 +183,7 @@ def add_message_by_thread_id(
178183
content=content,
179184
message_type=message_type,
180185
extra_metadata=extra_metadata,
186+
image_content=image_content,
181187
)
182188

183189
def add_tool_call(

src/storage/db/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class Message(Base):
6969
created_at = Column(DateTime, default=utc_now, comment="Creation time")
7070
token_count = Column(Integer, nullable=True, comment="Token count (optional)")
7171
extra_metadata = Column(JSON, nullable=True, comment="Additional metadata (complete message dump)")
72+
image_content = Column(Text, nullable=True, comment="Base64 encoded image content for multimodal messages")
7273

7374
# Relationships
7475
conversation = relationship("Conversation", back_populates="messages")
@@ -91,6 +92,7 @@ def format_utc_datetime(dt_value):
9192
"created_at": format_utc_datetime(self.created_at),
9293
"token_count": self.token_count,
9394
"metadata": self.extra_metadata or {},
95+
"image_content": self.image_content,
9496
"tool_calls": [tc.to_dict() for tc in self.tool_calls] if self.tool_calls else [],
9597
}
9698

0 commit comments

Comments
 (0)