Skip to content

Commit 3782682

Browse files
committed
🧪 Add tests for vectordb core
1 parent f98f1b7 commit 3782682

20 files changed

+880
-1002
lines changed

backend/apps/knowledge_summary_app.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,11 @@ async def auto_summary(
3939
language=language,
4040
model_id=model_id
4141
)
42-
except Exception:
43-
logger.error("Knowledge base summary generation failed", exc_info=True)
42+
except Exception as e:
43+
logger.error(
44+
f"Knowledge base summary generation failed: {e}", exc_info=True)
4445
return StreamingResponse(
45-
"data: {\"status\": \"error\", \"message\": \"Knowledge base summary generation failed due to an internal error.\"}\n\n",
46+
"data: {{\"status\": \"error\", \"message\": \"Knowledge base summary generation failed due to an internal error.\"}}\n\n",
4647
media_type="text/event-stream",
4748
status_code=500
4849
)

doc/docs/en/backend/overview.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ backend/
2828
├── services/ # Business service layer
2929
│ ├── agent_service.py # Agent business logic
3030
│ ├── conversation_management_service.py # Conversation management
31-
│ ├── elasticsearch_service.py # Search engine service
31+
│ ├── vectordatabase_service.py # Search engine service
3232
│ ├── model_health_service.py # Model health checks
3333
│ ├── prompt_service.py # Prompt service
3434
│ └── tenant_config_service.py # Tenant configuration service

doc/docs/zh/backend/overview.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ backend/
2828
├── services/ # 业务服务层
2929
│ ├── agent_service.py # 代理业务逻辑
3030
│ ├── conversation_management_service.py # 对话管理
31-
│ ├── elasticsearch_service.py # 搜索引擎服务
31+
│ ├── vectordatabase_service.py # 搜索引擎服务
3232
│ ├── model_health_service.py # 模型健康检查
3333
│ ├── prompt_service.py # 提示词服务
3434
│ └── tenant_config_service.py # 租户配置服务

doc/docs/zh/sdk/vector-database.md

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ docker network rm elastic
234234
- `elasticsearch_core.py`: 主类,包含所有 Elasticsearch 操作
235235
- `embedding_model.py`: 处理使用 Jina AI 模型生成嵌入向量
236236
- `utils.py`: 数据格式化和显示的工具函数
237-
- `elasticsearch_service.py`: FastAPI 服务,提供 REST API 接口
237+
- `vectordatabase_service.py`: FastAPI 服务,提供 REST API 接口
238238

239239
## 使用示例
240240

@@ -244,10 +244,10 @@ docker network rm elastic
244244
from nexent.vector_database.elasticsearch_core import ElasticSearchCore
245245
246246
# 使用 .env 文件中的凭据初始化
247-
es_core = ElasticSearchCore()
247+
vdb_core = ElasticSearchCore()
248248
249249
# 或直接指定凭据
250-
es_core = ElasticSearchCore(
250+
vdb_core = ElasticSearchCore(
251251
host="https://localhost:9200",
252252
api_key="your_api_key",
253253
verify_certs=False,
@@ -259,21 +259,21 @@ es_core = ElasticSearchCore(
259259

260260
```python
261261
# 创建新的向量索引
262-
es_core.create_vector_index("my_documents")
262+
vdb_core.create_index("my_documents")
263263
264264
# 列出所有用户索引
265-
indices = es_core.get_user_indices()
265+
indices = vdb_core.get_user_indices()
266266
print(indices)
267267
268268
# 获取所有索引的统计信息
269-
all_indices_stats = es_core.get_all_indices_stats()
269+
all_indices_stats = vdb_core.get_all_indices_stats()
270270
print(all_indices_stats)
271271
272272
# 删除索引
273-
es_core.delete_index("my_documents")
273+
vdb_core.delete_index("my_documents")
274274
275275
# 创建测试知识库
276-
index_name, doc_count = es_core.create_test_knowledge_base()
276+
index_name, doc_count = vdb_core.create_test_knowledge_base()
277277
print(f"创建了测试知识库 {index_name},包含 {doc_count} 个文档")
278278
```
279279

@@ -304,29 +304,29 @@ documents = [
304304
}
305305
]
306306
# 支持批量处理,默认批处理大小为3000
307-
total_indexed = es_core.index_documents("my_documents", documents, batch_size=3000)
307+
total_indexed = vdb_core.vectorize_documents("my_documents", documents, batch_size=3000)
308308
print(f"成功索引了 {total_indexed} 个文档")
309309
310310
# 通过 URL 或路径删除文档
311-
deleted_count = es_core.delete_documents_by_path_or_url("my_documents", "https://example.com/doc1")
311+
deleted_count = vdb_core.delete_documents("my_documents", "https://example.com/doc1")
312312
print(f"删除了 {deleted_count} 个文档")
313313
```
314314

315315
### 搜索功能
316316

317317
```python
318318
# 文本精确搜索
319-
results = es_core.accurate_search("my_documents", "示例查询", top_k=5)
319+
results = vdb_core.accurate_search("my_documents", "示例查询", top_k=5)
320320
for result in results:
321321
print(f"得分: {result['score']}, 文档: {result['document']['title']}")
322322
323323
# 语义向量搜索
324-
results = es_core.semantic_search("my_documents", "示例查询", top_k=5)
324+
results = vdb_core.semantic_search("my_documents", "示例查询", top_k=5)
325325
for result in results:
326326
print(f"得分: {result['score']}, 文档: {result['document']['title']}")
327327
328328
# 混合搜索
329-
results = es_core.hybrid_search(
329+
results = vdb_core.hybrid_search(
330330
"my_documents",
331331
"示例查询",
332332
top_k=5,
@@ -340,19 +340,19 @@ for result in results:
340340
341341
```python
342342
# 获取索引统计信息
343-
stats = es_core.get_index_stats("my_documents")
343+
stats = vdb_core.get_indices_detail(["my_documents"])
344344
print(stats)
345345
346346
# 获取文件列表及详细信息
347-
file_details = es_core.get_file_list_with_details("my_documents")
347+
file_details = vdb_core.get_documents_detail("my_documents")
348348
print(file_details)
349349
350350
# 获取嵌入模型信息
351-
embedding_model = es_core.get_embedding_model_info("my_documents")
351+
embedding_model = vdb_core.get_embedding_model_info("my_documents")
352352
print(f"使用的嵌入模型: {embedding_model}")
353353
354354
# 打印所有索引信息
355-
es_core.print_all_indices_info()
355+
vdb_core.print_all_indices_info()
356356
```
357357
358358
## ElasticSearchCore 主要功能
@@ -368,7 +368,7 @@ ElasticSearchCore 类提供了以下主要功能:
368368
369369
```python
370370
# 获取索引的文件列表及详细信息
371-
files = es_core.get_file_list_with_details("my_documents")
371+
files = vdb_core.get_documents_detail("my_documents")
372372
for file in files:
373373
print(f"文件路径: {file['path_or_url']}")
374374
print(f"文件名: {file['file']}")
@@ -377,11 +377,11 @@ for file in files:
377377
print("---")
378378
379379
# 获取嵌入模型信息
380-
model_info = es_core.get_embedding_model_info("my_documents")
380+
model_info = vdb_core.get_embedding_model_info("my_documents")
381381
print(f"使用的嵌入模型: {model_info}")
382382
383383
# 获取所有索引的综合统计信息
384-
all_stats = es_core.get_all_indices_stats()
384+
all_stats = vdb_core.get_all_indices_stats()
385385
for index_name, stats in all_stats.items():
386386
print(f"索引: {index_name}")
387387
print(f"文档数: {stats['base_info']['doc_count']}")
@@ -392,12 +392,12 @@ for index_name, stats in all_stats.items():
392392
393393
## API 服务接口
394394
395-
通过 `elasticsearch_service.py` 提供的 FastAPI 服务,可使用 REST API 访问上述所有功能。
395+
通过 `vectordatabase_service.py` 提供的 FastAPI 服务,可使用 REST API 访问上述所有功能。
396396
397397
### 服务启动
398398
399399
```bash
400-
python -m nexent.service.elasticsearch_service
400+
python -m nexent.service.vectordatabase_service
401401
```
402402
403403
服务默认在 `http://localhost:8000` 运行。
@@ -836,34 +836,34 @@ print(json.dumps(response.json(), indent=2, ensure_ascii=False))
836836
837837
```python
838838
# 初始化 ElasticSearchCore
839-
es_core = ElasticSearchCore()
839+
vdb_core = ElasticSearchCore()
840840
841841
# 获取或创建测试知识库
842842
index_name = "sample_articles"
843843
844844
# 列出所有用户索引
845-
user_indices = es_core.get_user_indices()
845+
user_indices = vdb_core.get_user_indices()
846846
for idx in user_indices:
847847
print(f" - {idx}")
848848
849849
# 执行搜索
850850
if index_name in user_indices:
851851
# 精确搜索
852852
query = "Doctor"
853-
accurate_results = es_core.accurate_search(index_name, query, top_k=2)
853+
accurate_results = vdb_core.accurate_search(index_name, query, top_k=2)
854854
855855
# 语义搜索
856856
query = "medical professionals in London"
857-
semantic_results = es_core.semantic_search(index_name, query, top_k=2)
857+
semantic_results = vdb_core.semantic_search(index_name, query, top_k=2)
858858
859859
# 混合搜索
860860
query = "medical professionals in London"
861-
semantic_results = es_core.hybrid_search(index_name, query, top_k=2, weight_accurate=0.5)
861+
semantic_results = vdb_core.hybrid_search(index_name, query, top_k=2, weight_accurate=0.5)
862862
863863
# 获取索引统计信息
864-
stats = es_core.get_index_stats(index_name)
865-
fields = es_core.get_index_mapping(index_name)
866-
unique_sources = es_core.get_unique_sources_count(index_name)
864+
stats = vdb_core.get_indices_detail([index_name])
865+
fields = vdb_core.get_index_mapping(index_name)
866+
unique_sources = vdb_core.get_unique_sources_count(index_name)
867867
```
868868
869869
## 许可证

test/backend/agents/test_create_agent_info.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
sys.modules['database.agent_db'] = MagicMock()
6060
sys.modules['database.tool_db'] = MagicMock()
6161
sys.modules['database.model_management_db'] = MagicMock()
62-
sys.modules['services.elasticsearch_service'] = MagicMock()
62+
sys.modules['services.vectordatabase_service'] = MagicMock()
6363
sys.modules['services.tenant_config_service'] = MagicMock()
6464
sys.modules['utils.prompt_template_utils'] = MagicMock()
6565
sys.modules['utils.config_utils'] = MagicMock()
@@ -250,7 +250,7 @@ async def test_create_tool_config_list_with_knowledge_base_tool(self):
250250
with patch('backend.agents.create_agent_info.discover_langchain_tools') as mock_discover, \
251251
patch('backend.agents.create_agent_info.search_tools_for_sub_agent') as mock_search_tools, \
252252
patch('backend.agents.create_agent_info.get_selected_knowledge_list') as mock_knowledge, \
253-
patch('backend.agents.create_agent_info.elastic_core') as mock_elastic, \
253+
patch('backend.agents.create_agent_info.get_vector_db_core') as mock_get_vector_db_core, \
254254
patch('backend.agents.create_agent_info.get_embedding_model') as mock_embedding:
255255

256256
mock_discover.return_value = []
@@ -268,9 +268,10 @@ async def test_create_tool_config_list_with_knowledge_base_tool(self):
268268
]
269269
mock_knowledge.return_value = [
270270
{"index_name": "knowledge_1"},
271-
{"index_name": "knowledge_2"}
271+
{"index_name": "knowledge_2"},
272272
]
273-
mock_elastic.return_value = "mock_elastic_core"
273+
mock_vdb_core = "mock_elastic_core"
274+
mock_get_vector_db_core.return_value = mock_vdb_core
274275
mock_embedding.return_value = "mock_embedding_model"
275276

276277
result = await create_tool_config_list("agent_1", "tenant_1", "user_1")

0 commit comments

Comments
 (0)