Skip to content

Commit 4787eb3

Browse files
committed
✨ Multi modal agent.
Pass the URL of the multimodal file as the query to the agent.
2 parents 5cbdc90 + 1e3bbfe commit 4787eb3

File tree

12 files changed

+140
-318
lines changed

12 files changed

+140
-318
lines changed

backend/agents/create_agent_info.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,17 +239,17 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
239239
"vdb_core": get_vector_db_core(),
240240
"embedding_model": get_embedding_model(tenant_id=tenant_id),
241241
}
242-
elif tool_config.class_name == "AnalyzeImageTool":
243-
tool_config.metadata = {
244-
"vlm_model": get_vlm_model(tenant_id=tenant_id),
245-
"storage_client": minio_client,
246-
}
247242
elif tool_config.class_name == "AnalyzeTextFileTool":
248243
tool_config.metadata = {
249244
"llm_model": get_llm_model(tenant_id=tenant_id),
250245
"storage_client": minio_client,
251246
"data_process_service_url": DATA_PROCESS_SERVICE
252247
}
248+
elif tool_config.class_name == "AnalyzeImageTool":
249+
tool_config.metadata = {
250+
"vlm_model": get_vlm_model(tenant_id=tenant_id),
251+
"storage_client": minio_client,
252+
}
253253

254254
tool_config_list.append(tool_config)
255255

backend/services/tool_configuration_service.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -616,25 +616,25 @@ def _validate_local_tool(
616616
'embedding_model': embedding_model,
617617
}
618618
tool_instance = tool_class(**params)
619-
elif tool_name == "analyze_text_file":
619+
elif tool_name == "analyze_image":
620620
if not tenant_id or not user_id:
621621
raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
622-
long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
622+
image_to_text_model = get_vlm_model(tenant_id=tenant_id)
623623
params = {
624624
**instantiation_params,
625-
'llm_model': long_text_to_text_model,
626-
'storage_client': minio_client,
627-
"data_process_service_url": DATA_PROCESS_SERVICE
625+
'vlm_model': image_to_text_model,
626+
'storage_client': minio_client
628627
}
629628
tool_instance = tool_class(**params)
630-
elif tool_name == "analyze_image":
629+
elif tool_name == "analyze_text_file":
631630
if not tenant_id or not user_id:
632631
raise ToolExecutionException(f"Tenant ID and User ID are required for {tool_name} validation")
633-
image_to_text_model = get_vlm_model(tenant_id=tenant_id)
632+
long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
634633
params = {
635634
**instantiation_params,
636-
'vlm_model': image_to_text_model,
637-
'storage_client': minio_client
635+
'llm_model': long_text_to_text_model,
636+
'storage_client': minio_client,
637+
"data_process_service_url": DATA_PROCESS_SERVICE
638638
}
639639
tool_instance = tool_class(**params)
640640
else:

frontend/app/[locale]/agents/components/PromptManager.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ export default function PromptManager({
615615
overflowY: "auto",
616616
}}
617617
autoSize={false}
618-
disabled={!isEditingMode}
618+
disabled={!isEditingMode || isGeneratingAgent}
619619
/>
620620
</div>
621621

sdk/nexent/core/agents/nexent_agent.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,17 +83,17 @@ def create_local_tool(self, tool_config: ToolConfig):
8383
"vdb_core", None) if tool_config.metadata else None
8484
tools_obj.embedding_model = tool_config.metadata.get(
8585
"embedding_model", None) if tool_config.metadata else None
86-
elif class_name == "AnalyzeImageTool":
87-
tools_obj = tool_class(observer=self.observer,
88-
vlm_model=tool_config.metadata.get("vlm_model", []),
89-
storage_client=tool_config.metadata.get("storage_client", []),
90-
**params)
9186
elif class_name == "AnalyzeTextFileTool":
9287
tools_obj = tool_class(observer=self.observer,
9388
llm_model=tool_config.metadata.get("llm_model", []),
9489
storage_client=tool_config.metadata.get("storage_client", []),
9590
data_process_service_url=tool_config.metadata.get("data_process_service_url", []),
9691
**params)
92+
elif class_name == "AnalyzeImageTool":
93+
tools_obj = tool_class(observer=self.observer,
94+
vlm_model=tool_config.metadata.get("vlm_model", []),
95+
storage_client=tool_config.metadata.get("storage_client", []),
96+
**params)
9797
else:
9898
tools_obj = tool_class(**params)
9999
if hasattr(tools_obj, 'observer'):

sdk/nexent/core/tools/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from .move_item_tool import MoveItemTool
1313
from .list_directory_tool import ListDirectoryTool
1414
from .terminal_tool import TerminalTool
15-
from .analyze_image_tool import AnalyzeImageTool
1615
from .analyze_text_file_tool import AnalyzeTextFileTool
16+
from .analyze_image_tool import AnalyzeImageTool
1717

1818
__all__ = [
1919
"ExaSearchTool",
@@ -30,6 +30,6 @@
3030
"MoveItemTool",
3131
"ListDirectoryTool",
3232
"TerminalTool",
33-
"AnalyzeImageTool",
34-
"AnalyzeTextFileTool"
33+
"AnalyzeTextFileTool",
34+
"AnalyzeImageTool"
3535
]

sdk/nexent/core/utils/prompt_template_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@
2626
def get_prompt_template(template_type: str, language: str = LANGUAGE["ZH"], **kwargs) -> Dict[str, Any]:
2727
"""
2828
Get prompt template
29+
2930
Args:
3031
template_type: Template type, supports the following values:
3132
- 'analyze_image': Analyze image template
3233
- 'analyze_file': Analyze file template (for text files)
3334
language: Language code ('zh' or 'en')
3435
**kwargs: Additional parameters, for agent type need to pass is_manager parameter
36+
3537
Returns:
3638
dict: Loaded prompt template
3739
"""

sdk/nexent/core/utils/tools_common_message.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class ToolSign(Enum):
1111
TAVILY_SEARCH = "d" # Tavily search tool identifier
1212
FILE_OPERATION = "f" # File operation tool identifier
1313
TERMINAL_OPERATION = "t" # Terminal operation tool identifier
14-
MULTIMODAL_OPERATION = "m" # Multimodal operation tool identifier
14+
MULTIMODAL_OPERATION = "m" # Multimodal operation tool identifier
1515

1616

1717
# Tool sign mapping for backward compatibility

test/backend/agents/test_create_agent_info.py

Lines changed: 36 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,17 @@
55
from pathlib import Path
66
from unittest.mock import AsyncMock, MagicMock, patch, Mock, PropertyMock
77

8+
from test.common.env_test_utils import bootstrap_env
9+
10+
env_state = bootstrap_env()
11+
consts_const = env_state["mock_const"]
812
TEST_ROOT = Path(__file__).resolve().parents[2]
913
PROJECT_ROOT = TEST_ROOT.parent
1014

1115
# Ensure project backend package is found before test/backend
1216
for _path in (str(PROJECT_ROOT), str(TEST_ROOT)):
1317
if _path not in sys.path:
1418
sys.path.insert(0, _path)
15-
from test.common.env_test_utils import bootstrap_env
16-
17-
env_state = bootstrap_env()
18-
consts_const = env_state["mock_const"]
19-
20-
from test.common.env_test_utils import bootstrap_env
21-
22-
env_state = bootstrap_env()
23-
consts_const = env_state["mock_const"]
2419

2520
# Utilities ---------------------------------------------------------------
2621
def _create_stub_module(name: str, **attrs):
@@ -47,30 +42,6 @@ def _create_stub_module(name: str, **attrs):
4742
consts_const.MODEL_CONFIG_MAPPING = {"llm": "llm_config"}
4843
consts_const.LANGUAGE = {"ZH": "zh"}
4944
consts_const.DATA_PROCESS_SERVICE = "https://example.com/data-process"
50-
# Utilities ---------------------------------------------------------------
51-
def _create_stub_module(name: str, **attrs):
52-
"""Return a lightweight module stub with the provided attributes."""
53-
module = types.ModuleType(name)
54-
for attr_name, attr_value in attrs.items():
55-
setattr(module, attr_name, attr_value)
56-
return module
57-
58-
59-
# Configure required constants via shared bootstrap env
60-
consts_const.MINIO_ENDPOINT = "http://localhost:9000"
61-
consts_const.MINIO_ACCESS_KEY = "test_access_key"
62-
consts_const.MINIO_SECRET_KEY = "test_secret_key"
63-
consts_const.MINIO_REGION = "us-east-1"
64-
consts_const.MINIO_DEFAULT_BUCKET = "test-bucket"
65-
consts_const.POSTGRES_HOST = "localhost"
66-
consts_const.POSTGRES_USER = "test_user"
67-
consts_const.NEXENT_POSTGRES_PASSWORD = "test_password"
68-
consts_const.POSTGRES_DB = "test_db"
69-
consts_const.POSTGRES_PORT = 5432
70-
consts_const.DEFAULT_TENANT_ID = "default_tenant"
71-
consts_const.LOCAL_MCP_SERVER = "http://localhost:5011"
72-
consts_const.MODEL_CONFIG_MAPPING = {"llm": "llm_config"}
73-
consts_const.LANGUAGE = {"ZH": "zh"}
7445

7546
# Mock utils module
7647
utils_mock = MagicMock()
@@ -125,13 +96,14 @@ def _create_stub_module(name: str, **attrs):
12596
sys.modules['utils.langchain_utils'] = MagicMock()
12697
sys.modules['utils.model_name_utils'] = MagicMock()
12798
sys.modules['langchain_core.tools'] = MagicMock()
128-
sys.modules['services.memory_config_service'] = MagicMock()
12999
# Build services module hierarchy with minimal functionality
130100
services_module = _create_stub_module("services")
131101
sys.modules['services'] = services_module
132102
sys.modules['services.image_service'] = _create_stub_module(
133103
"services.image_service", get_vlm_model=MagicMock(return_value="stub_vlm")
134104
)
105+
sys.modules['services.memory_config_service'] = MagicMock()
106+
# Extend services hierarchy with additional stubs
135107
sys.modules['services.file_management_service'] = _create_stub_module(
136108
"services.file_management_service",
137109
get_llm_model=MagicMock(return_value="stub_llm_model"),
@@ -140,18 +112,16 @@ def _create_stub_module(name: str, **attrs):
140112
"services.tool_configuration_service",
141113
initialize_tools_on_startup=AsyncMock(),
142114
)
115+
sys.modules['nexent.memory.memory_service'] = MagicMock()
116+
143117
# Build top-level nexent module to avoid importing the real package
144-
nexent_module = _create_stub_module(
145-
"nexent",
146-
MessageObserver=mock_message_observer,
147-
)
118+
nexent_module = _create_stub_module("nexent", MessageObserver=mock_message_observer)
148119
sys.modules['nexent'] = nexent_module
149120

150121
# Create nested modules for nexent.core to satisfy imports safely
151122
sys.modules['nexent.core'] = _create_stub_module("nexent.core")
152123
sys.modules['nexent.core.agents'] = _create_stub_module("nexent.core.agents")
153124
sys.modules['nexent.core.utils'] = _create_stub_module("nexent.core.utils")
154-
sys.modules['nexent.memory.memory_service'] = MagicMock()
155125

156126
# Create mock classes that might be imported
157127
mock_agent_config = MagicMock()
@@ -397,76 +367,76 @@ async def test_create_tool_config_list_with_knowledge_base_tool(self):
397367
assert last_call[1]['class_name'] == "KnowledgeBaseSearchTool"
398368

399369
@pytest.mark.asyncio
400-
async def test_create_tool_config_list_with_analyze_text_file_tool(self):
401-
"""Ensure AnalyzeTextFileTool receives text-specific metadata."""
370+
async def test_create_tool_config_list_with_analyze_image_tool(self):
371+
"""Ensure AnalyzeImageTool receives VLM model metadata."""
402372
mock_tool_instance = MagicMock()
403-
mock_tool_instance.class_name = "AnalyzeTextFileTool"
373+
mock_tool_instance.class_name = "AnalyzeImageTool"
404374
mock_tool_config.return_value = mock_tool_instance
405375

406376
with patch('backend.agents.create_agent_info.discover_langchain_tools', return_value=[]), \
407377
patch('backend.agents.create_agent_info.search_tools_for_sub_agent') as mock_search_tools, \
408-
patch('backend.agents.create_agent_info.get_llm_model') as mock_get_llm_model, \
378+
patch('backend.agents.create_agent_info.get_vlm_model') as mock_get_vlm_model, \
409379
patch('backend.agents.create_agent_info.minio_client', new_callable=MagicMock) as mock_minio_client:
410380

411381
mock_search_tools.return_value = [
412382
{
413-
"class_name": "AnalyzeTextFileTool",
414-
"name": "analyze_text_file",
415-
"description": "Analyze text file tool",
383+
"class_name": "AnalyzeImageTool",
384+
"name": "analyze_image",
385+
"description": "Analyze image tool",
416386
"inputs": "string",
417-
"output_type": "array",
387+
"output_type": "string",
418388
"params": [{"name": "prompt", "default": "describe"}],
419389
"source": "local",
420390
"usage": None
421391
}
422392
]
423-
mock_get_llm_model.return_value = "mock_llm_model"
393+
mock_get_vlm_model.return_value = "mock_vlm_model"
424394

425395
result = await create_tool_config_list("agent_1", "tenant_1", "user_1")
426396

427397
assert len(result) == 1
428398
assert result[0] is mock_tool_instance
429-
mock_get_llm_model.assert_called_once_with(tenant_id="tenant_1")
399+
mock_get_vlm_model.assert_called_once_with(tenant_id="tenant_1")
430400
assert mock_tool_instance.metadata == {
431-
"llm_model": "mock_llm_model",
432-
"storage_client": mock_minio_client,
433-
"data_process_service_url": consts_const.DATA_PROCESS_SERVICE,
401+
"vlm_model": "mock_vlm_model",
402+
"storage_client": mock_minio_client
434403
}
435404

436405
@pytest.mark.asyncio
437-
async def test_create_tool_config_list_with_analyze_image_tool(self):
438-
"""Ensure AnalyzeImageTool receives VLM model metadata."""
406+
async def test_create_tool_config_list_with_analyze_text_file_tool(self):
407+
"""Ensure AnalyzeTextFileTool receives text-specific metadata."""
439408
mock_tool_instance = MagicMock()
440-
mock_tool_instance.class_name = "AnalyzeImageTool"
409+
mock_tool_instance.class_name = "AnalyzeTextFileTool"
441410
mock_tool_config.return_value = mock_tool_instance
442411

443412
with patch('backend.agents.create_agent_info.discover_langchain_tools', return_value=[]), \
444413
patch('backend.agents.create_agent_info.search_tools_for_sub_agent') as mock_search_tools, \
445-
patch('backend.agents.create_agent_info.get_vlm_model') as mock_get_vlm_model, \
414+
patch('backend.agents.create_agent_info.get_llm_model') as mock_get_llm_model, \
446415
patch('backend.agents.create_agent_info.minio_client', new_callable=MagicMock) as mock_minio_client:
447416

448417
mock_search_tools.return_value = [
449418
{
450-
"class_name": "AnalyzeImageTool",
451-
"name": "analyze_image",
452-
"description": "Analyze image tool",
419+
"class_name": "AnalyzeTextFileTool",
420+
"name": "analyze_text_file",
421+
"description": "Analyze text file tool",
453422
"inputs": "string",
454-
"output_type": "string",
423+
"output_type": "array",
455424
"params": [{"name": "prompt", "default": "describe"}],
456425
"source": "local",
457426
"usage": None
458427
}
459428
]
460-
mock_get_vlm_model.return_value = "mock_vlm_model"
429+
mock_get_llm_model.return_value = "mock_llm_model"
461430

462431
result = await create_tool_config_list("agent_1", "tenant_1", "user_1")
463432

464433
assert len(result) == 1
465434
assert result[0] is mock_tool_instance
466-
mock_get_vlm_model.assert_called_once_with(tenant_id="tenant_1")
435+
mock_get_llm_model.assert_called_once_with(tenant_id="tenant_1")
467436
assert mock_tool_instance.metadata == {
468-
"vlm_model": "mock_vlm_model",
469-
"storage_client": mock_minio_client
437+
"llm_model": "mock_llm_model",
438+
"storage_client": mock_minio_client,
439+
"data_process_service_url": consts_const.DATA_PROCESS_SERVICE,
470440
}
471441

472442

@@ -1278,16 +1248,13 @@ async def test_join_minio_file_description_to_query_with_files(self):
12781248
minio_files = [
12791249
{"url": "/nexent/1.pdf", "name": "1.pdf"},
12801250
{"url": "/nexent/2.pdf", "name": "2.pdf"},
1281-
{"url": "/nexent/3.pdf", "name": "3.pdf"},
1251+
{"no_description": "should be ignored"}
12821252
]
12831253
query = "test query"
12841254

12851255
result = await join_minio_file_description_to_query(minio_files, query)
12861256

1287-
expected = ("User provided some reference files:\nFile S3 URL: s3://nexent/1.pdf, file name:1.pdf\n"
1288-
"File S3 URL: s3://nexent/2.pdf, file name:2.pdf\n"
1289-
"File S3 URL: s3://nexent/3.pdf, file name:3.pdf\n\n"
1290-
'User wants to answer questions based on the above information: test query')
1257+
expected = "User provided some reference files:\nFile S3 URL: s3://nexent/1.pdf, file name:1.pdf\nFile S3 URL: s3://nexent/2.pdf, file name:2.pdf\n\nUser wants to answer questions based on the above information: test query"
12911258
assert result == expected
12921259

12931260
@pytest.mark.asyncio

0 commit comments

Comments
 (0)