Skip to content

Commit cfb6700

Browse files
committed
refactor: 优化知识库获取逻辑,提升性能和数据处理效率
- 修改 get_all_databases 方法,避免预加载文件数据,直接构造返回数据,减少查询时间。 - 批量获取集合信息,减少对 Milvus 的单独查询,提高数据获取效率。
1 parent c0a6afc commit cfb6700

File tree

1 file changed

+53
-13
lines changed

1 file changed

+53
-13
lines changed

src/core/knowledgebase.py

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,39 @@ def _load_models(self):
106106
def get_all_databases(self):
107107
"""获取所有知识库"""
108108
with db_manager.get_session_context() as session:
109-
databases = session.query(KnowledgeDatabase).options(
110-
joinedload(KnowledgeDatabase.files)
111-
).all()
112-
return [db.to_dict() for db in databases] # Assuming to_dict handles files correctly
109+
# 只查询数据库基本信息,不预加载文件数据
110+
databases = session.query(KnowledgeDatabase).all()
111+
result = []
112+
113+
for db in databases:
114+
# 直接构造返回数据,避免调用 to_dict 方法中的关联查询
115+
db_dict = {
116+
"id": db.id,
117+
"db_id": db.db_id,
118+
"name": db.name,
119+
"description": db.description,
120+
"embed_model": db.embed_model,
121+
"dimension": db.dimension,
122+
"metadata": db.meta_info or {},
123+
"created_at": db.created_at.isoformat() if db.created_at else None
124+
}
125+
126+
# 只查询文件的基本信息,不加载节点数据
127+
files = session.query(KnowledgeFile).filter_by(database_id=db.db_id).all()
128+
db_dict["files"] = {}
129+
for file_obj in files:
130+
db_dict["files"][file_obj.file_id] = {
131+
"file_id": file_obj.file_id,
132+
"filename": file_obj.filename,
133+
"path": file_obj.path,
134+
"type": file_obj.file_type,
135+
"status": file_obj.status,
136+
"created_at": file_obj.created_at.timestamp() if file_obj.created_at else time.time()
137+
}
138+
139+
result.append(db_dict)
140+
141+
return result
113142

114143
def get_database_by_id(self, db_id):
115144
"""根据ID获取知识库"""
@@ -310,19 +339,30 @@ def get_databases(self):
310339
assert config.enable_knowledge_base, "知识库未启用"
311340
databases = self.get_all_databases()
312341
databases_with_milvus = []
313-
for db_data in databases: # db_data is already a dict from to_dict()
342+
343+
# 批量获取所有集合信息,避免逐个查询
344+
try:
345+
all_collections = self.get_collections()
346+
collections_dict = {col.get("collection_name", col.get("name")): col for col in all_collections}
347+
except Exception as e:
348+
logger.warning(f"批量获取Milvus集合信息失败: {e}")
349+
collections_dict = {}
350+
351+
for db_data in databases:
314352
db_copy = db_data.copy()
315-
try:
316-
milvus_info = self.get_collection_info(db_copy["db_id"])
317-
# Merge Milvus info carefully, avoid overwriting existing keys like 'name', 'description'
353+
354+
# 从缓存的集合信息中获取数据,而不是单独查询
355+
if db_copy["db_id"] in collections_dict:
356+
milvus_info = collections_dict[db_copy["db_id"]]
357+
# 只添加Milvus特定的字段,避免覆盖数据库基本信息
318358
for k, v in milvus_info.items():
319-
if k not in db_copy or k in ["row_count", "status", "error_message"]: # Milvus specific keys
359+
if k in ["row_count", "status", "error_message"]:
320360
db_copy[k] = v
321-
except Exception as e:
322-
logger.warning(f"获取知识库 {db_copy.get('name')} (ID: {db_copy.get('db_id')}) 的Milvus信息失败: {e}")
323-
db_copy.update({"row_count": 0, "status": "未连接", "error": str(e)})
361+
else:
362+
# 如果集合不存在,设置默认值
363+
db_copy.update({"row_count": 0, "status": "未连接"})
324364

325-
# files should be part of db_copy from to_dict()
365+
# 统计处理中的文件数量
326366
db_copy_files = db_copy.get("files", {}).values()
327367
processing_files_count = sum(1 for file_info in db_copy_files if file_info.get("status") in ["processing", "waiting"])
328368
if processing_files_count > 0:

0 commit comments

Comments
 (0)