Skip to content

Commit 4d449a7

Browse files
committed
✨Added Datamate vector knowledge base core
1 parent 5a33d81 commit 4d449a7

File tree

17 files changed

+520
-278
lines changed

17 files changed

+520
-278
lines changed

backend/agents/create_agent_info.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ async def create_agent_config(
150150
tenant_id=tenant_id, user_id=user_id)
151151
if knowledge_info_list:
152152
for knowledge_info in knowledge_info_list:
153+
if knowledge_info.get('knowledge_sources') != 'elasticsearch':
154+
continue
153155
knowledge_name = knowledge_info.get("index_name")
154156
try:
155157
message = ElasticSearchService().get_summary(index_name=knowledge_name)
@@ -239,13 +241,22 @@ async def create_tool_config_list(agent_id, tenant_id, user_id):
239241
knowledge_info_list = get_selected_knowledge_list(
240242
tenant_id=tenant_id, user_id=user_id)
241243
index_names = [knowledge_info.get(
242-
"index_name") for knowledge_info in knowledge_info_list]
244+
"index_name") for knowledge_info in knowledge_info_list if knowledge_info.get('knowledge_sources') == 'elasticsearch']
243245
tool_config.metadata = {
244246
"index_names": index_names,
245247
"vdb_core": get_vector_db_core(),
246248
"embedding_model": get_embedding_model(tenant_id=tenant_id),
247249
"name_resolver": build_knowledge_name_mapping(tenant_id=tenant_id, user_id=user_id),
248250
}
251+
elif tool_config.class_name == "DataMateSearchTool":
252+
knowledge_info_list = get_selected_knowledge_list(
253+
tenant_id=tenant_id, user_id=user_id)
254+
index_names = [knowledge_info.get(
255+
"index_name") for knowledge_info in knowledge_info_list if
256+
knowledge_info.get('knowledge_sources') == 'datamate']
257+
tool_config.metadata = {
258+
"index_names": index_names,
259+
}
249260
elif tool_config.class_name == "AnalyzeTextFileTool":
250261
tool_config.metadata = {
251262
"llm_model": get_llm_model(tenant_id=tenant_id),

backend/apps/datamate_app.py

Lines changed: 11 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
from http import HTTPStatus
77

88
from services.datamate_service import (
9-
sync_datamate_knowledge_bases,
10-
fetch_datamate_knowledge_bases,
9+
sync_datamate_knowledge_bases_and_create_records,
1110
fetch_datamate_knowledge_base_file_list
1211
)
1312
from utils.auth_utils import get_current_user_id
@@ -16,62 +15,23 @@
1615
logger = logging.getLogger("datamate_app")
1716

1817

19-
@router.get("/knowledge_bases")
20-
async def get_datamate_knowledge_bases_endpoint(
21-
authorization: Optional[str] = Header(None)
22-
):
23-
"""Get list of DataMate knowledge bases."""
24-
try:
25-
knowledge_bases = await fetch_datamate_knowledge_bases()
26-
27-
# Transform to the same format as list_indices method
28-
indices = []
29-
indices_info = []
30-
31-
for kb in knowledge_bases:
32-
kb_id = kb.get("id")
33-
kb_name = kb.get("name") or kb_id
34-
35-
# Get stats from the knowledge base data
36-
stats = kb.get("stats", {})
37-
chunk_count = kb.get("chunkCount", 0)
38-
doc_count = kb.get("docCount", 0)
3918

40-
indices.append(kb_name)
41-
indices_info.append({
42-
"name": kb_id, # Internal index name (used as ID)
43-
"display_name": kb_name, # User-facing knowledge base name
44-
"stats": {
45-
"base_info": {
46-
"doc_count": doc_count,
47-
"chunk_count": chunk_count,
48-
"creation_date": kb.get("createdAt"),
49-
"update_date": kb.get("updatedAt"),
50-
"embedding_model": kb.get("embeddingModel", "unknown"),
51-
}
52-
}
53-
})
5419

55-
return {
56-
"indices": indices,
57-
"count": len(indices),
58-
"indices_info": indices_info,
59-
}
60-
except Exception as e:
61-
raise HTTPException(
62-
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error fetching DataMate knowledge bases: {str(e)}")
63-
64-
65-
@router.post("/sync")
66-
async def sync_datamate_knowledge_bases_endpoint(
20+
@router.post("/sync_and_create_records")
21+
async def sync_datamate_and_create_records_endpoint(
6722
authorization: Optional[str] = Header(None)
6823
):
69-
"""Sync DataMate knowledge bases and their files."""
24+
"""Sync DataMate knowledge bases and create knowledge records in local database."""
7025
try:
71-
return await sync_datamate_knowledge_bases()
26+
user_id, tenant_id = get_current_user_id(authorization)
27+
28+
return await sync_datamate_knowledge_bases_and_create_records(
29+
tenant_id=tenant_id,
30+
user_id=user_id
31+
)
7232
except Exception as e:
7333
raise HTTPException(
74-
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error syncing DataMate knowledge bases: {str(e)}")
34+
status_code=HTTPStatus.INTERNAL_SERVER_ERROR, detail=f"Error syncing DataMate knowledge bases and creating records: {str(e)}")
7535

7636

7737
@router.get("/{knowledge_base_id}/files")

backend/database/knowledge_db.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,59 @@ def create_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
7979
raise e
8080

8181

82+
def upsert_knowledge_record(query: Dict[str, Any]) -> Dict[str, Any]:
83+
"""
84+
Create or update a knowledge base record (upsert operation).
85+
If a record with the same index_name and tenant_id exists, update it.
86+
Otherwise, create a new record.
87+
88+
Args:
89+
query: Dictionary containing knowledge base data, must include:
90+
- index_name: Knowledge base name (used as unique identifier)
91+
- tenant_id: Tenant ID
92+
- knowledge_name: User-facing knowledge base name
93+
- knowledge_describe: Knowledge base description
94+
- knowledge_sources: Knowledge base sources (optional, default 'elasticsearch')
95+
- embedding_model_name: Embedding model name
96+
- user_id: User ID for created_by and updated_by fields
97+
98+
Returns:
99+
Dict[str, Any]: Dictionary with 'knowledge_id' and 'index_name'
100+
"""
101+
try:
102+
with get_db_session() as session:
103+
# Check if record exists
104+
existing_record = session.query(KnowledgeRecord).filter(
105+
KnowledgeRecord.index_name == query['index_name'],
106+
KnowledgeRecord.tenant_id == query['tenant_id'],
107+
KnowledgeRecord.delete_flag != 'Y'
108+
).first()
109+
110+
if existing_record:
111+
# Update existing record
112+
existing_record.knowledge_name = query.get('knowledge_name') or query.get('index_name')
113+
existing_record.knowledge_describe = query.get('knowledge_describe', '')
114+
existing_record.knowledge_sources = query.get('knowledge_sources', 'elasticsearch')
115+
existing_record.embedding_model_name = query.get('embedding_model_name')
116+
existing_record.updated_by = query.get('user_id')
117+
existing_record.update_time = func.current_timestamp()
118+
119+
session.flush()
120+
session.commit()
121+
return {
122+
"knowledge_id": existing_record.knowledge_id,
123+
"index_name": existing_record.index_name,
124+
"knowledge_name": existing_record.knowledge_name,
125+
}
126+
else:
127+
# Create new record
128+
return create_knowledge_record(query)
129+
130+
except SQLAlchemyError as e:
131+
session.rollback()
132+
raise e
133+
134+
82135
def update_knowledge_record(query: Dict[str, Any]) -> bool:
83136
"""
84137
Update a knowledge base record
@@ -230,6 +283,29 @@ def get_knowledge_info_by_tenant_id(tenant_id: str) -> List[Dict[str, Any]]:
230283
raise e
231284

232285

286+
def get_knowledge_info_by_tenant_and_source(tenant_id: str, knowledge_sources: str) -> List[Dict[str, Any]]:
287+
"""
288+
Get knowledge base records by tenant ID and knowledge sources.
289+
290+
Args:
291+
tenant_id: Tenant ID to filter by
292+
knowledge_sources: Knowledge sources to filter by (e.g., 'datamate')
293+
294+
Returns:
295+
List[Dict[str, Any]]: List of knowledge base record dictionaries
296+
"""
297+
try:
298+
with get_db_session() as session:
299+
result = session.query(KnowledgeRecord).filter(
300+
KnowledgeRecord.tenant_id == tenant_id,
301+
KnowledgeRecord.knowledge_sources == knowledge_sources,
302+
KnowledgeRecord.delete_flag != 'Y'
303+
).all()
304+
return [as_dict(item) for item in result]
305+
except SQLAlchemyError as e:
306+
raise e
307+
308+
233309
def update_model_name_by_index_name(index_name: str, embedding_model_name: str, tenant_id: str, user_id: str) -> bool:
234310
try:
235311
with get_db_session() as session:

backend/services/agent_service.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -97,45 +97,45 @@ def _resolve_model_with_fallback(
9797
) -> str | None:
9898
"""
9999
Resolve model_id from model_display_name with fallback to quick config LLM model.
100-
100+
101101
Args:
102102
model_display_name: Display name of the model to lookup
103103
exported_model_id: Original model_id from export (for logging only)
104104
model_label: Label for logging (e.g., "Model", "Business logic model")
105105
tenant_id: Tenant ID for model lookup
106-
106+
107107
Returns:
108108
Resolved model_id or None if not found and no fallback available
109109
"""
110110
if not model_display_name:
111111
return None
112-
112+
113113
# Try to find model by display name in current tenant
114114
resolved_id = get_model_id_by_display_name(model_display_name, tenant_id)
115-
115+
116116
if resolved_id:
117117
logger.info(
118118
f"{model_label} '{model_display_name}' found in tenant {tenant_id}, "
119119
f"mapped to model_id: {resolved_id} (exported model_id was: {exported_model_id})")
120120
return resolved_id
121-
121+
122122
# Model not found, try fallback to quick config LLM model
123123
logger.warning(
124124
f"{model_label} '{model_display_name}' (exported model_id: {exported_model_id}) "
125125
f"not found in tenant {tenant_id}, falling back to quick config LLM model.")
126-
126+
127127
quick_config_model = tenant_config_manager.get_model_config(
128128
key=MODEL_CONFIG_MAPPING["llm"],
129129
tenant_id=tenant_id
130130
)
131-
131+
132132
if quick_config_model:
133133
fallback_id = quick_config_model.get("model_id")
134134
logger.info(
135135
f"Using quick config LLM model for {model_label.lower()}: "
136136
f"{quick_config_model.get('display_name')} (model_id: {fallback_id})")
137137
return fallback_id
138-
138+
139139
logger.warning(f"No quick config LLM model found for tenant {tenant_id}")
140140
return None
141141

@@ -998,7 +998,7 @@ async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str)
998998

999999
# Check if any tool is KnowledgeBaseSearchTool and set its metadata to empty dict
10001000
for tool in tool_list:
1001-
if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool"]:
1001+
if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "DataMateSearchTool"]:
10021002
tool.metadata = {}
10031003

10041004
# Get model_id and model display name from agent_info
@@ -1132,7 +1132,7 @@ async def import_agent_by_agent_id(
11321132
if not import_agent_info.name.isidentifier():
11331133
raise ValueError(
11341134
f"Invalid agent name: {import_agent_info.name}. agent name must be a valid python variable name.")
1135-
1135+
11361136
# Resolve model IDs with fallback
11371137
# Note: We use model_display_name for cross-tenant compatibility
11381138
# The exported model_id is kept for reference/debugging only
@@ -1142,7 +1142,7 @@ async def import_agent_by_agent_id(
11421142
model_label="Model",
11431143
tenant_id=tenant_id
11441144
)
1145-
1145+
11461146
business_logic_model_id = _resolve_model_with_fallback(
11471147
model_display_name=import_agent_info.business_logic_model_name,
11481148
exported_model_id=import_agent_info.business_logic_model_id,
@@ -1344,44 +1344,44 @@ def check_agent_availability(
13441344
) -> tuple[bool, list[str]]:
13451345
"""
13461346
Check if an agent is available based on its tools and model configuration.
1347-
1347+
13481348
Args:
13491349
agent_id: The agent ID to check
13501350
tenant_id: The tenant ID
13511351
agent_info: Optional pre-fetched agent info (to avoid duplicate DB queries)
13521352
model_cache: Optional model cache for performance optimization
1353-
1353+
13541354
Returns:
13551355
tuple: (is_available: bool, unavailable_reasons: list[str])
13561356
"""
13571357
unavailable_reasons: list[str] = []
1358-
1358+
13591359
if model_cache is None:
13601360
model_cache = {}
1361-
1361+
13621362
# Fetch agent info if not provided
13631363
if agent_info is None:
13641364
agent_info = search_agent_info_by_agent_id(agent_id, tenant_id)
1365-
1365+
13661366
if not agent_info:
13671367
return False, ["agent_not_found"]
1368-
1368+
13691369
# Check tool availability
13701370
tool_info = search_tools_for_sub_agent(agent_id=agent_id, tenant_id=tenant_id)
13711371
tool_id_list = [tool["tool_id"] for tool in tool_info if tool.get("tool_id") is not None]
13721372
if tool_id_list:
13731373
tool_statuses = check_tool_is_available(tool_id_list)
13741374
if not all(tool_statuses):
13751375
unavailable_reasons.append("tool_unavailable")
1376-
1376+
13771377
# Check model availability
13781378
model_reasons = _collect_model_availability_reasons(
13791379
agent=agent_info,
13801380
tenant_id=tenant_id,
13811381
model_cache=model_cache
13821382
)
13831383
unavailable_reasons.extend(model_reasons)
1384-
1384+
13851385
is_available = len(unavailable_reasons) == 0
13861386
return is_available, unavailable_reasons
13871387

@@ -1935,4 +1935,4 @@ def get_sub_agents_recursive(parent_agent_id: int, depth: int = 0, max_depth: in
19351935
except Exception as e:
19361936
logger.exception(
19371937
f"Failed to get agent call relationship for agent {agent_id}: {str(e)}")
1938-
raise ValueError(f"Failed to get agent call relationship: {str(e)}")
1938+
raise ValueError(f"Failed to get agent call relationship: {str(e)}")

0 commit comments

Comments
 (0)