Skip to content

Commit 3036b14

Browse files
authored
Merge branch 'develop' into wmc/bugfix_1117
2 parents 0fc8f13 + bef9329 commit 3036b14

23 files changed

+1039
-57
lines changed

backend/agents/create_agent_info.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
from services.tenant_config_service import get_selected_knowledge_list
1818
from services.remote_mcp_service import get_remote_mcp_server_list
1919
from services.memory_config_service import build_memory_context
20+
from services.image_service import get_vlm_model
2021
from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
2122
from database.tool_db import search_tools_for_sub_agent
2223
from database.model_management_db import get_model_records, get_model_by_model_id
24+
from database.client import minio_client
2325
from utils.model_name_utils import add_repo_to_name
2426
from utils.prompt_template_utils import get_agent_prompt_template
2527
from utils.config_utils import tenant_config_manager, get_model_name_from_config
@@ -236,6 +238,12 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
236238
"vdb_core": get_vector_db_core(),
237239
"embedding_model": get_embedding_model(tenant_id=tenant_id),
238240
}
241+
elif tool_config.class_name == "AnalyzeImageTool":
242+
tool_config.metadata = {
243+
"vlm_model": get_vlm_model(tenant_id=tenant_id),
244+
"storage_client": minio_client,
245+
}
246+
239247
tool_config_list.append(tool_config)
240248

241249
return tool_config_list

backend/services/image_service.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
import aiohttp
55

66
from consts.const import DATA_PROCESS_SERVICE
7+
from consts.const import MODEL_CONFIG_MAPPING
8+
from utils.config_utils import tenant_config_manager, get_model_name_from_config
9+
10+
from nexent import MessageObserver
11+
from nexent.core.models import OpenAIVLModel
712

813
logger = logging.getLogger("image_service")
914

@@ -23,3 +28,19 @@ async def proxy_image_impl(decoded_url: str):
2328

2429
result = await response.json()
2530
return result
31+
32+
def get_vlm_model(tenant_id: str):
33+
# Get the tenant config
34+
vlm_model_config = tenant_config_manager.get_model_config(
35+
key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
36+
return OpenAIVLModel(
37+
observer=MessageObserver(),
38+
model_id=get_model_name_from_config(
39+
vlm_model_config) if vlm_model_config else "",
40+
api_base=vlm_model_config.get("base_url", ""),
41+
api_key=vlm_model_config.get("api_key", ""),
42+
temperature=0.7,
43+
top_p=0.7,
44+
frequency_penalty=0.5,
45+
max_tokens=512
46+
)

backend/services/tool_configuration_service.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
from database.user_tenant_db import get_all_tenant_ids
2626
from services.vectordatabase_service import get_embedding_model, get_vector_db_core
2727
from services.tenant_config_service import get_selected_knowledge_list
28+
from database.client import minio_client
29+
from services.image_service import get_vlm_model
2830

2931
logger = logging.getLogger("tool_configuration_service")
3032

@@ -613,6 +615,16 @@ def _validate_local_tool(
613615
'embedding_model': embedding_model,
614616
}
615617
tool_instance = tool_class(**params)
618+
elif tool_name == "analyze_image":
619+
if not tenant_id or not user_id:
620+
raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
621+
image_to_text_model = get_vlm_model(tenant_id=tenant_id)
622+
params = {
623+
**instantiation_params,
624+
'vlm_model': image_to_text_model,
625+
'storage_client': minio_client
626+
}
627+
tool_instance = tool_class(**params)
616628
else:
617629
tool_instance = tool_class(**instantiation_params)
618630

doc/docs/zh/opensource-memorial-wall.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,3 +516,11 @@ nexent智能体帮助我学到更多的东西,赞!
516516
::: info SkyWalker - 2025-11-26
517517
第一次使用nexent,想借此更快入手ai应用开发呀!
518518
:::
519+
520+
:::info user - 2025-11-26
521+
Nexent开发者加油
522+
:::
523+
524+
:::info NOSN - 2025-11-27
525+
Nexent越做越强大!
526+
:::

frontend/app/[locale]/agents/components/PromptManager.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ export default function PromptManager({
615615
overflowY: "auto",
616616
}}
617617
autoSize={false}
618-
disabled={!isEditingMode}
618+
disabled={!isEditingMode || isGeneratingAgent}
619619
/>
620620
</div>
621621

sdk/nexent/core/agents/nexent_agent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ def create_local_tool(self, tool_config: ToolConfig):
8383
"vdb_core", None) if tool_config.metadata else None
8484
tools_obj.embedding_model = tool_config.metadata.get(
8585
"embedding_model", None) if tool_config.metadata else None
86+
elif class_name == "AnalyzeImageTool":
87+
tools_obj = tool_class(observer=self.observer,
88+
vlm_model=tool_config.metadata.get("vlm_model", []),
89+
storage_client=tool_config.metadata.get("storage_client", []),
90+
**params)
8691
else:
8792
tools_obj = tool_class(**params)
8893
if hasattr(tools_obj, 'observer'):
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# 图片分析 Prompt 模板
2+
# 用于图片分析
3+
4+
system_prompt: |-
5+
用户提出了一个问题:{{ query }},请从回答这个问题的角度精简、仔细描述一下这个图片,200字以内。
6+
7+
**图片分析要求:**
8+
1. 重点关注与用户问题相关的图片内容
9+
2. 描述要精简明了,突出关键信息
10+
3. 避免无关细节,专注于能帮助回答问题的内容
11+
4. 保持客观描述,不要过度解读
12+
13+
user_prompt: |
14+
请仔细观察这张图片,并从回答用户问题的角度进行描述。
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Image Understanding Prompt Templates
2+
3+
system_prompt: |-
4+
The user has asked a question: {{ query }}. Please provide a concise and careful description of this image from the perspective of answering this question, within 200 words.
5+
6+
**Image Analysis Requirements:**
7+
1. Focus on image content relevant to the user's question
8+
2. Keep descriptions concise and clear, highlighting key information
9+
3. Avoid irrelevant details, focus on content that helps answer the question
10+
4. Maintain objective description, avoid over-interpretation
11+
12+
user_prompt: |
13+
Please carefully observe this image and describe it from the perspective of answering the user's question.

sdk/nexent/core/tools/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .move_item_tool import MoveItemTool
1313
from .list_directory_tool import ListDirectoryTool
1414
from .terminal_tool import TerminalTool
15+
from .analyze_image_tool import AnalyzeImageTool
1516

1617
__all__ = [
1718
"ExaSearchTool",
@@ -27,5 +28,6 @@
2728
"DeleteDirectoryTool",
2829
"MoveItemTool",
2930
"ListDirectoryTool",
30-
"TerminalTool"
31+
"TerminalTool",
32+
"AnalyzeImageTool"
3133
]
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
""""
2+
Analyze Image Tool
3+
4+
Analyze images using a large language model.
5+
Supports images from S3, HTTP, and HTTPS URLs.
6+
"""
7+
8+
import json
9+
import logging
10+
from io import BytesIO
11+
from typing import List
12+
13+
from jinja2 import Template, StrictUndefined
14+
from pydantic import Field
15+
from smolagents.tools import Tool
16+
17+
from nexent.core.models import OpenAIVLModel
18+
from nexent.core.utils.observer import MessageObserver, ProcessType
19+
from nexent.core.utils.prompt_template_utils import get_prompt_template
20+
from nexent.core.utils.tools_common_message import ToolCategory, ToolSign
21+
from nexent.storage import MinIOStorageClient
22+
from nexent.multi_modal.load_save_object import LoadSaveObjectManager
23+
24+
logger = logging.getLogger("analyze_image_tool")
25+
26+
27+
class AnalyzeImageTool(Tool):
28+
"""Tool for understanding and analyzing image using a visual language model"""
29+
30+
name = "analyze_image"
31+
description = (
32+
"This tool uses a visual language model to understand images based on your query and then returns a description of the image.\n"
33+
"It is used to understand and analyze multiple images, with image sources supporting S3 URLs (s3://bucket/key or /bucket/key), "
34+
"HTTP, and HTTPS URLs.\n"
35+
"Use this tool when you want to retrieve information contained in an image and provide the image's URL and your query."
36+
)
37+
inputs = {
38+
"image_urls_list": {
39+
"type": "array",
40+
"description": "List of image URLs (S3, HTTP, or HTTPS). Supports s3://bucket/key, /bucket/key, http://, and https:// URLs.",
41+
},
42+
"query": {
43+
"type": "string",
44+
"description": "User's question to guide the analysis"
45+
}
46+
}
47+
output_type = "array"
48+
category = ToolCategory.MULTIMODAL.value
49+
tool_sign = ToolSign.MULTIMODAL_OPERATION.value
50+
51+
def __init__(
52+
self,
53+
observer: MessageObserver = Field(
54+
description="Message observer",
55+
default=None,
56+
exclude=True),
57+
vlm_model: OpenAIVLModel = Field(
58+
description="The VLM model to use",
59+
default=None,
60+
exclude=True),
61+
storage_client: MinIOStorageClient = Field(
62+
description="Storage client for downloading files from S3 URLs、HTTP URLs、HTTPS URLs.",
63+
default=None,
64+
exclude=True)
65+
):
66+
super().__init__()
67+
self.observer = observer
68+
self.vlm_model = vlm_model
69+
self.storage_client = storage_client
70+
# Create LoadSaveObjectManager with the storage client
71+
self.mm = LoadSaveObjectManager(storage_client=self.storage_client)
72+
73+
# Dynamically apply the load_object decorator to forward method
74+
self.forward = self.mm.load_object(input_names=["image_urls_list"])(self._forward_impl)
75+
76+
self.running_prompt_zh = "正在分析图片..."
77+
self.running_prompt_en = "Analyzing image..."
78+
79+
def _forward_impl(self, image_urls_list: List[bytes], query: str) -> List[str]:
80+
"""
81+
Analyze images identified by S3 URL, HTTP URL, or HTTPS URL and return the identified text.
82+
83+
Note: This method is wrapped by load_object decorator which downloads
84+
the image from S3 URL, HTTP URL, or HTTPS URL and passes bytes to this method.
85+
86+
Args:
87+
image_urls_list: List of image bytes converted from URLs by the decorator.
88+
The load_object decorator converts URLs to bytes before calling this method.
89+
query: User's question to guide the analysis
90+
91+
Returns:
92+
List[str]: One analysis string per image that aligns with the order
93+
of the provided images.
94+
95+
Raises:
96+
Exception: If the image cannot be downloaded or analyzed.
97+
"""
98+
# Send tool run message
99+
if self.observer:
100+
running_prompt = self.running_prompt_zh if self.observer.lang == "zh" else self.running_prompt_en
101+
self.observer.add_message("", ProcessType.TOOL, running_prompt)
102+
card_content = [{"icon": "image", "text": f"Analyzing images..."}]
103+
self.observer.add_message("", ProcessType.CARD, json.dumps(card_content, ensure_ascii=False))
104+
105+
if image_urls_list is None:
106+
raise ValueError("image_urls cannot be None")
107+
108+
if not isinstance(image_urls_list, list):
109+
raise ValueError("image_urls must be a list of bytes")
110+
111+
if not image_urls_list:
112+
raise ValueError("image_urls must contain at least one image")
113+
114+
# Load prompts from yaml file
115+
language = self.observer.lang if self.observer else "en"
116+
prompts = get_prompt_template(template_type='analyze_image', language=language)
117+
system_prompt = Template(prompts['system_prompt'], undefined=StrictUndefined).render({'query': query})
118+
119+
try:
120+
analysis_results: List[str] = []
121+
for index, image_bytes in enumerate(image_urls_list, start=1):
122+
logger.info(f"Extracting image #{index}, query: {query}")
123+
image_stream = BytesIO(image_bytes)
124+
try:
125+
response = self.vlm_model.analyze_image(
126+
image_input=image_stream,
127+
system_prompt=system_prompt
128+
)
129+
except Exception as e:
130+
raise Exception(f"Error understanding image {index}: {str(e)}")
131+
132+
analysis_results.append(response.content)
133+
134+
return analysis_results
135+
except Exception as e:
136+
logger.error(f"Error analyzing image: {str(e)}", exc_info=True)
137+
error_msg = f"Error analyzing image: {str(e)}"
138+
raise Exception(error_msg)

0 commit comments

Comments
 (0)