Skip to content

Commit eb338fe

Browse files
committed
✨ image to text tool
2 parents dd1e687 + 175e3f6 commit eb338fe

19 files changed

+878
-108
lines changed

backend/agents/create_agent_info.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,16 @@
1717
from services.tenant_config_service import get_selected_knowledge_list
1818
from services.remote_mcp_service import get_remote_mcp_server_list
1919
from services.memory_config_service import build_memory_context
20+
from services.image_service import get_vlm_model
2021
from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
2122
from database.tool_db import search_tools_for_sub_agent
2223
from database.model_management_db import get_model_records, get_model_by_model_id
24+
from database.client import minio_client
2325
from utils.model_name_utils import add_repo_to_name
2426
from utils.prompt_template_utils import get_agent_prompt_template
2527
from utils.config_utils import tenant_config_manager, get_model_name_from_config
2628
from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE
2729

28-
from backend.database.client import minio_client
29-
from backend.services.image_service import get_vlm_model
30-
3130
logger = logging.getLogger("create_agent_info")
3231
logger.setLevel(logging.DEBUG)
3332

backend/mcp_service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
32
from utils.logging_utils import configure_logging
43
from fastmcp import FastMCP
54
from tool_collection.mcp.local_mcp_service import local_mcp_service

backend/services/image_service.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
import aiohttp
55

66
from consts.const import DATA_PROCESS_SERVICE
7+
from consts.const import MODEL_CONFIG_MAPPING
8+
from utils.config_utils import tenant_config_manager, get_model_name_from_config
9+
710
from nexent import MessageObserver
811
from nexent.core.models import OpenAIVLModel
912

10-
from backend.consts.const import MODEL_CONFIG_MAPPING
11-
from backend.utils.config_utils import tenant_config_manager, get_model_name_from_config
12-
1313
logger = logging.getLogger("image_service")
1414

1515

backend/services/tool_configuration_service.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,8 @@
2525
from database.user_tenant_db import get_all_tenant_ids
2626
from services.vectordatabase_service import get_embedding_model, get_vector_db_core
2727
from services.tenant_config_service import get_selected_knowledge_list
28-
29-
from backend.database.client import minio_client
30-
from backend.services.image_service import get_vlm_model
28+
from database.client import minio_client
29+
from services.image_service import get_vlm_model
3130

3231
logger = logging.getLogger("tool_configuration_service")
3332

doc/docs/zh/opensource-memorial-wall.md

Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,6 @@
1616
请保持消息的礼貌和尊重,符合我们的行为准则。
1717
-->
1818

19-
::: info happyzhang - 2025-11-13
20-
也许我们正见证着未来的“后起之秀”😀
21-
:::
22-
23-
::: info KevinLeeNJ - 2025-11-13
24-
来参加华为ICT大赛的,nexent很不错,希望后续能有更多功能!
25-
:::
26-
27-
::: info lzysleep - 2025-11-7
28-
非常不错的项目,很适合快速上手搭建自己的Agent,赞赞赞!
29-
:::
30-
31-
::: info fishcat - 2025-10-31
32-
很好的项目,希望蒸蒸日上
33-
:::
34-
35-
::: tip xiaomi250 - 2025-10-18
36-
打算冲一波 ICT 大赛!正好借着这个机会多捣鼓捣鼓,把我的技术再升个级,想想还挺有意思的~
37-
:::
38-
3919
::: tip aibito - 某创业公司后端开发 - 2025-05-18
4020
我们是一家只有 15 人的小公司,之前一直想做智能客服但技术门槛太高。发现 Nexent 后如获至宝!20+ 文件格式支持让我们轻松处理用户上传的各种文档,多模态对话功能完美解决了语音客服需求。最重要的是,我们的产品经理现在也能直接用自然语言调整智能体逻辑,开发效率提升了好几倍!
4121
:::
@@ -180,6 +160,10 @@ Nexent的自然语言生成Agent以及多智能体协同是我一直在研究的
180160
第一次接触智能体编排,是为了参加华为ICT大赛而了解 Nexent 的。 没想到入门比想象中容易,文档也写得很清晰。
181161
:::
182162

163+
::: tip xiaomi250 - 2025-10-18
164+
打算冲一波 ICT 大赛!正好借着这个机会多捣鼓捣鼓,把我的技术再升个级,想想还挺有意思的~
165+
:::
166+
183167
::: tip YuXiaoLoong - 2025-10-27
184168
Nexent是一个十分便利的开发平台,文档清晰,工具齐全,有幸能用上这么好用的平台,希望能在这个平台上学到更多技术和思想。
185169
:::
@@ -238,6 +222,8 @@ Nexent功能如此之强大,给我很多帮助,感谢开发者!厉害
238222

239223
::: info y-dq - 2025-10-28 
240224
想要自己尝试搭建智能体,感叹Nexent的功能如此强大!
225+
:::
226+
241227
::: tip cai7777 - 2025-10 23
242228
参加ICT大赛来了解 Nexent
243229
:::
@@ -314,6 +300,10 @@ Nexent功能如此之强大,给我很多帮助,感谢开发者!厉害
314300
感谢 Nexent 让我踏上了开源之旅!希望能参加ict大赛长长见识。项目不错,给个star~
315301
:::
316302

303+
::: info fishcat - 2025-10-31
304+
很好的项目,希望蒸蒸日上
305+
:::
306+
317307
:::info XxHosxX - 2025-11-5
318308
希望参与ICT大赛以及Nexent平台提升自己的能力:)
319309
:::
@@ -330,6 +320,10 @@ Nexent功能如此之强大,给我很多帮助,感谢开发者!厉害
330320
期待能使用Nexent成为智能体开发大佬
331321
:::
332322

323+
::: info lzysleep - 2025-11-7
324+
非常不错的项目,很适合快速上手搭建自己的Agent,赞赞赞!
325+
:::
326+
333327
::: info xiaochenIpter - 2025-11-08
334328
希望能参加ict大赛可以学习到更多知识,感谢 Nexent 让我踏上了开源之旅!平台开发智能体的能力十分强大,希望能够学习到更多东西!
335329
:::
@@ -398,6 +392,14 @@ Nexent功能如此之强大,给我很多帮助,感谢开发者!厉害
398392
我又来了,通过华为ICT了解到nexent,正在学习中...
399393
:::
400394

395+
::: info happyzhang - 2025-11-13
396+
也许我们正见证着未来的“后起之秀”😀
397+
:::
398+
399+
::: info KevinLeeNJ - 2025-11-13
400+
来参加华为ICT大赛的,nexent很不错,希望后续能有更多功能!
401+
:::
402+
401403
::: info user - 2025-11-14
402404
我要参加华为ICT
403405
:::
@@ -434,3 +436,66 @@ Nexent功能如此之强大,给我很多帮助,感谢开发者!厉害
434436
感谢 Nexent 让我踏上了开源之旅!给我一个机会制作智能体
435437
:::
436438

439+
::: info 开源小白 - 2025-11-19
440+
感谢 Nexent 让我踏上了开源之旅!这个项目的文档真的很棒,帮助我快速上手。
441+
:::
442+
443+
::: info chengyudan - 2025-10-20
444+
感谢 Nexent 让我踏上了开源之旅!
445+
:::
446+
447+
::: info user - 2025-11-20
448+
学习ai - agent非常好的项目,后面会持续输出贡献!
449+
:::
450+
451+
::: china-king-hs - 2025-11-20
452+
希望能正常使用nexent
453+
:::
454+
455+
::: info user - 2025-11-22
456+
感谢nexent这个开源项目
457+
:::
458+
459+
::: tip xiaofu-2025-11-23
460+
xiaofu到此一游,感谢 Nexent 让我踏上了开源之旅!
461+
:::
462+
463+
::: info DUTBenjamin - 2025-11-23
464+
来参加华为ICT大赛的,正好借着这个机会多捣鼓捣鼓,学到更多东西,加油!
465+
:::
466+
467+
::: dean-stock - 2025-11-23
468+
感谢nexent让我第一次接触到了智能体,让我从使用到创作智能体的转变。
469+
:::
470+
471+
::: user - 2025-11-23
472+
学习到ai了,很好用
473+
:::
474+
475+
::: info chao - 2025-11-23
476+
使用 Nexent 开发了项目,MCP 工具集成特别强大,节省了大量开发时间!
477+
:::
478+
479+
::: adasibi - 2025-11-23
480+
学习ai很好用,感谢 Nexent 让我踏上了开源之旅!
481+
:::
482+
483+
::: user - 2025-11-23
484+
Nexent越来越好!
485+
:::
486+
487+
::: info DUTBenjamin - 2025-11-23
488+
来参加华为ICT大赛的,正好借着这个机会学到更多东西,加油!
489+
:::
490+
491+
::: info aurorahashcat - 2025-11-23
492+
nexent看起来超棒的自动化智能体构建平台,祝越来越好😀
493+
:::
494+
495+
::: williamllk from SJTU - 2025-11-23
496+
感谢 Nexent 让我第一次制作智能体,尝试将AI4Science的理念付诸实践
497+
:::
498+
499+
::: tip lostlight530 - 2025-11-24
500+
通过 Nexent 实现了 Router-Worker 架构的完美落地。无论是构建高情商的拟人化伴侣,还是处理严苛的结构化数据约束,这套框架都游刃有余。多智能体编排体验极佳!
501+
:::

sdk/nexent/core/agents/nexent_agent.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,6 @@ def create_local_tool(self, tool_config: ToolConfig):
8383
"vdb_core", None) if tool_config.metadata else None
8484
tools_obj.embedding_model = tool_config.metadata.get(
8585
"embedding_model", None) if tool_config.metadata else None
86-
tools_obj = tool_class(index_names=tool_config.metadata.get("index_names", []),
87-
observer=self.observer,
88-
vdb_core=tool_config.metadata.get("vdb_core", []),
89-
embedding_model=tool_config.metadata.get("embedding_model", []),
90-
**params)
9186
elif class_name == "AnalyzeImageTool":
9287
tools_obj = tool_class(observer=self.observer,
9388
vlm_model=tool_config.metadata.get("vlm_model", []),
Lines changed: 76 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,67 @@
1+
""""
2+
Analyze Image Tool
3+
4+
Analyze images using a large language model.
5+
Supports images from S3, HTTP, and HTTPS URLs.
6+
"""
7+
18
import json
29
import logging
310
from io import BytesIO
11+
from typing import List
412

513
from jinja2 import Template, StrictUndefined
614
from pydantic import Field
715
from smolagents.tools import Tool
816

9-
from ..models.openai_vlm import OpenAIVLModel
10-
from ..utils.observer import MessageObserver, ProcessType
11-
from ..utils.prompt_template_utils import get_prompt_template
12-
from ..utils.tools_common_message import ToolCategory, ToolSign
13-
from ... import MinIOStorageClient
14-
from ...multi_modal.load_save_object import LoadSaveObjectManager
17+
from nexent.core.models import OpenAIVLModel
18+
from nexent.core.utils.observer import MessageObserver, ProcessType
19+
from nexent.core.utils.prompt_template_utils import get_prompt_template
20+
from nexent.core.utils.tools_common_message import ToolCategory, ToolSign
21+
from nexent.storage import MinIOStorageClient
22+
from nexent.multi_modal.load_save_object import LoadSaveObjectManager
1523

1624
logger = logging.getLogger("analyze_image_tool")
1725

1826

1927
class AnalyzeImageTool(Tool):
20-
"""Tool for understanding and analyzing image"""
28+
"""Tool for understanding and analyzing image using a visual language model"""
2129

2230
name = "analyze_image"
2331
description = (
24-
"This tool uses a visual language model to understand images based on your query and then returns a description of the image."
25-
"It's used to understand and analyze images stored in S3 buckets, via HTTP and HTTPS."
32+
"This tool uses a visual language model to understand images based on your query and then returns a description of the image.\n"
33+
"It is used to understand and analyze multiple images, with image sources supporting S3 URLs (s3://bucket/key or /bucket/key), "
34+
"HTTP, and HTTPS URLs.\n"
2635
"Use this tool when you want to retrieve information contained in an image and provide the image's URL and your query."
2736
)
2837
inputs = {
29-
"image_url": {
30-
"type": "string",
31-
"description": "URL of the image to analyze (e.g., 's3://bucket/path/to/image.png',"
32-
"'http://image.png', 'https://image.png')."
38+
"image_urls_list": {
39+
"type": "array",
40+
"description": "List of image URLs (S3, HTTP, or HTTPS). Supports s3://bucket/key, /bucket/key, http://, and https:// URLs.",
3341
},
3442
"query": {
3543
"type": "string",
36-
"description": "The user query to perform."
44+
"description": "User's question to guide the analysis"
3745
}
3846
}
39-
output_type = "string"
40-
# todo
41-
category = ToolCategory.FILE.value
42-
tool_sign = ToolSign.FILE_OPERATION.value
47+
output_type = "array"
48+
category = ToolCategory.MULTIMODAL.value
49+
tool_sign = ToolSign.MULTIMODAL_OPERATION.value
4350

4451
def __init__(
4552
self,
46-
observer: MessageObserver = Field(description="Message observer", default=None, exclude=True),
47-
vlm_model: OpenAIVLModel = Field(description="The VLM model to use", default=None, exclude=True),
48-
storage_client: MinIOStorageClient = Field(description="Storage client to use", default=None, exclude=True),
53+
observer: MessageObserver = Field(
54+
description="Message observer",
55+
default=None,
56+
exclude=True),
57+
vlm_model: OpenAIVLModel = Field(
58+
description="The VLM model to use",
59+
default=None,
60+
exclude=True),
61+
storage_client: MinIOStorageClient = Field(
62+
description="Storage client for downloading files from S3 URLs、HTTP URLs、HTTPS URLs.",
63+
default=None,
64+
exclude=True)
4965
):
5066
super().__init__()
5167
self.observer = observer
@@ -55,49 +71,68 @@ def __init__(
5571
self.mm = LoadSaveObjectManager(storage_client=self.storage_client)
5672

5773
# Dynamically apply the load_object decorator to forward method
58-
self.forward = self.mm.load_object(input_names=["image_url"])(self._forward_impl)
74+
self.forward = self.mm.load_object(input_names=["image_urls_list"])(self._forward_impl)
5975

6076
self.running_prompt_zh = "正在分析图片..."
6177
self.running_prompt_en = "Analyzing image..."
6278

63-
def _forward_impl(self, image_url: bytes, query: str) -> str:
79+
def _forward_impl(self, image_urls_list: List[bytes], query: str) -> List[str]:
6480
"""
65-
Analyze images of S3 URL, HTTP URL, or HTTPS URL and return the identified text.
81+
Analyze images identified by S3 URL, HTTP URL, or HTTPS URL and return the identified text.
6682
6783
Note: This method is wrapped by load_object decorator which downloads
6884
the image from S3 URL, HTTP URL, or HTTPS URL and passes bytes to this method.
6985
7086
Args:
71-
image_url: Image bytes (converted from S3 URL, HTTP URL, or HTTPS URL by decorator).
87+
image_urls_list: List of image bytes converted from URLs by the decorator.
88+
The load_object decorator converts URLs to bytes before calling this method.
89+
query: User's question to guide the analysis
7290
7391
Returns:
74-
JSON string containing the recognized text.
92+
List[str]: One analysis string per image that aligns with the order
93+
of the provided images.
7594
7695
Raises:
7796
Exception: If the image cannot be downloaded or analyzed.
7897
"""
79-
# Note: image_url is now bytes after decorator processing
80-
image_stream = BytesIO(image_url)
81-
8298
# Send tool run message
8399
if self.observer:
84100
running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
85101
self.observer.add_message("", ProcessType.TOOL, running_prompt)
86-
card_content = [{"icon": "image", "text": "Analyzing image..."}]
102+
card_content = [{"icon": "image", "text": f"Analyzing images..."}]
87103
self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
88104

105+
if image_urls_list is None:
106+
raise ValueError("image_urls cannot be None")
107+
108+
if not isinstance(image_urls_list, list):
109+
raise ValueError("image_urls must be a list of bytes")
110+
111+
if not image_urls_list:
112+
raise ValueError("image_urls must contain at least one image")
113+
89114
# Load prompts from yaml file
90-
prompts = get_prompt_template(template_type='analyze_image', language=self.observer.lang)
115+
language = self.observer.lang if self.observer else "en"
116+
prompts = get_prompt_template(template_type='analyze_image', language=language)
117+
system_prompt = Template(prompts['system_prompt'], undefined=StrictUndefined).render({'query': query})
91118

92119
try:
93-
94-
response = self.vlm_model.analyze_image(
95-
image_input=image_stream,
96-
system_prompt=Template(prompts['system_prompt'], undefined=StrictUndefined).render({'query': query}))
120+
analysis_results: List[str] = []
121+
for index, image_bytes in enumerate(image_urls_list, start=1):
122+
logger.info(f"Extracting image #{index}, query: {query}")
123+
image_stream = BytesIO(image_bytes)
124+
try:
125+
response = self.vlm_model.analyze_image(
126+
image_input=image_stream,
127+
system_prompt=system_prompt
128+
)
129+
except Exception as e:
130+
raise Exception(f"Error understanding image {index}: {str(e)}")
131+
132+
analysis_results.append(response.content)
133+
134+
return analysis_results
97135
except Exception as e:
98-
raise Exception(f"Error understanding image: {str(e)}")
99-
text = response.content
100-
# Record the detailed content of this search
101-
# todo 返回的结构体是什么?
102-
search_results_data = {'text': text}
103-
return json.dumps(search_results_data, ensure_ascii=False)
136+
logger.error(f"Error analyzing image: {str(e)}", exc_info=True)
137+
error_msg = f"Error analyzing image: {str(e)}"
138+
raise Exception(error_msg)

0 commit comments

Comments
 (0)