Skip to content

Commit ca7a88f

Browse files
committed
✨ Now user can name the knowledge base whatever he wants without obeying the naming rule of elasticsearch
1 parent 3a17cb6 commit ca7a88f

File tree

4 files changed

+523
-4
lines changed

4 files changed

+523
-4
lines changed

backend/services/vectordatabase_service.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,12 @@ def _rethrow_or_plain(exc: Exception) -> None:
118118
msg = str(exc)
119119
try:
120120
parsed = json.loads(msg)
121-
if isinstance(parsed, dict) and parsed.get("error_code"):
122-
raise Exception(json.dumps(parsed, ensure_ascii=False))
123121
except Exception:
124-
pass
122+
raise Exception(msg)
123+
124+
if isinstance(parsed, dict) and parsed.get("error_code"):
125+
raise Exception(json.dumps(parsed, ensure_ascii=False))
126+
125127
raise Exception(msg)
126128

127129

test/backend/app/test_vectordatabase_app.py

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import os
77
import sys
88
import pytest
9-
from unittest.mock import patch, MagicMock, ANY
9+
from unittest.mock import patch, MagicMock, ANY, AsyncMock
1010
from fastapi.testclient import TestClient
1111
from fastapi import FastAPI
1212

@@ -1368,6 +1368,108 @@ async def test_health_check_exception(vdb_core_mock):
13681368
mock_health.assert_called_once_with(ANY)
13691369

13701370

1371+
@pytest.mark.asyncio
1372+
async def test_get_document_error_info_not_found(vdb_core_mock, auth_data):
1373+
"""
1374+
Test document error info when document is not found.
1375+
"""
1376+
with patch("backend.apps.vectordatabase_app.get_all_files_status", new=AsyncMock(return_value={})):
1377+
response = client.get(
1378+
f"/indices/{auth_data['index_name']}/documents/missing_doc/error-info",
1379+
headers=auth_data["auth_header"],
1380+
)
1381+
1382+
assert response.status_code == 404
1383+
assert "not found" in response.json()["detail"]
1384+
1385+
1386+
@pytest.mark.asyncio
1387+
async def test_get_document_error_info_no_task_id(auth_data):
1388+
"""
1389+
Test document error info when task id is empty.
1390+
"""
1391+
with patch(
1392+
"backend.apps.vectordatabase_app.get_all_files_status",
1393+
new=AsyncMock(
1394+
return_value={
1395+
"doc-1": {
1396+
"latest_task_id": ""
1397+
}
1398+
}
1399+
),
1400+
), patch("backend.apps.vectordatabase_app.get_redis_service") as mock_redis:
1401+
response = client.get(
1402+
"/indices/test_index/documents/doc-1/error-info",
1403+
headers=auth_data["auth_header"],
1404+
)
1405+
1406+
assert response.status_code == 200
1407+
assert response.json() == {"status": "success", "error_code": None}
1408+
mock_redis.assert_not_called()
1409+
1410+
1411+
@pytest.mark.asyncio
1412+
async def test_get_document_error_info_json_error_code(auth_data):
1413+
"""
1414+
Test document error info JSON parsing for error_code.
1415+
"""
1416+
redis_mock = MagicMock()
1417+
redis_mock.get_error_info.return_value = '{"error_code": "INVALID_FORMAT"}'
1418+
1419+
with patch(
1420+
"backend.apps.vectordatabase_app.get_all_files_status",
1421+
new=AsyncMock(
1422+
return_value={
1423+
"doc-1": {
1424+
"latest_task_id": "task-123"
1425+
}
1426+
}
1427+
),
1428+
), patch(
1429+
"backend.apps.vectordatabase_app.get_redis_service",
1430+
return_value=redis_mock,
1431+
):
1432+
response = client.get(
1433+
"/indices/test_index/documents/doc-1/error-info",
1434+
headers=auth_data["auth_header"],
1435+
)
1436+
1437+
assert response.status_code == 200
1438+
assert response.json() == {"status": "success", "error_code": "INVALID_FORMAT"}
1439+
redis_mock.get_error_info.assert_called_once_with("task-123")
1440+
1441+
1442+
@pytest.mark.asyncio
1443+
async def test_get_document_error_info_regex_error_code(auth_data):
1444+
"""
1445+
Test document error info regex extraction when JSON parsing fails.
1446+
"""
1447+
redis_mock = MagicMock()
1448+
redis_mock.get_error_info.return_value = "oops {'error_code': 'TIMEOUT_ERROR'}"
1449+
1450+
with patch(
1451+
"backend.apps.vectordatabase_app.get_all_files_status",
1452+
new=AsyncMock(
1453+
return_value={
1454+
"doc-1": {
1455+
"latest_task_id": "task-999"
1456+
}
1457+
}
1458+
),
1459+
), patch(
1460+
"backend.apps.vectordatabase_app.get_redis_service",
1461+
return_value=redis_mock,
1462+
):
1463+
response = client.get(
1464+
"/indices/test_index/documents/doc-1/error-info",
1465+
headers=auth_data["auth_header"],
1466+
)
1467+
1468+
assert response.status_code == 200
1469+
assert response.json() == {"status": "success", "error_code": "TIMEOUT_ERROR"}
1470+
redis_mock.get_error_info.assert_called_once_with("task-999")
1471+
1472+
13711473
@pytest.mark.asyncio
13721474
async def test_health_check_timeout_exception(vdb_core_mock):
13731475
"""
@@ -1562,6 +1664,59 @@ async def test_hybrid_search_value_error(vdb_core_mock, auth_data):
15621664
assert response.json() == {"detail": "Query text is required"}
15631665

15641666

1667+
@pytest.mark.asyncio
1668+
async def test_get_index_chunks_value_error(vdb_core_mock):
1669+
"""
1670+
Test get_index_chunks maps ValueError to 404.
1671+
"""
1672+
index_name = "test_index"
1673+
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
1674+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value="resolved_index"), \
1675+
patch("backend.apps.vectordatabase_app.ElasticSearchService.get_index_chunks") as mock_get_chunks:
1676+
1677+
mock_get_chunks.side_effect = ValueError("Unknown index")
1678+
1679+
response = client.post(f"/indices/{index_name}/chunks")
1680+
1681+
assert response.status_code == 404
1682+
assert response.json() == {"detail": "Unknown index"}
1683+
mock_get_chunks.assert_called_once_with(
1684+
index_name="resolved_index",
1685+
page=None,
1686+
page_size=None,
1687+
path_or_url=None,
1688+
vdb_core=ANY,
1689+
)
1690+
1691+
1692+
@pytest.mark.asyncio
1693+
async def test_create_chunk_value_error(vdb_core_mock, auth_data):
1694+
"""
1695+
Test create_chunk maps ValueError to 404.
1696+
"""
1697+
with patch("backend.apps.vectordatabase_app.get_vector_db_core", return_value=vdb_core_mock), \
1698+
patch("backend.apps.vectordatabase_app.get_current_user_id", return_value=(auth_data["user_id"], auth_data["tenant_id"])), \
1699+
patch("backend.apps.vectordatabase_app.get_index_name_by_knowledge_name", return_value=auth_data["index_name"]), \
1700+
patch("backend.apps.vectordatabase_app.ElasticSearchService.create_chunk") as mock_create:
1701+
1702+
mock_create.side_effect = ValueError("Invalid chunk payload")
1703+
1704+
payload = {
1705+
"content": "Hello world",
1706+
"path_or_url": "doc-1",
1707+
}
1708+
1709+
response = client.post(
1710+
f"/indices/{auth_data['index_name']}/chunk",
1711+
json=payload,
1712+
headers=auth_data["auth_header"],
1713+
)
1714+
1715+
assert response.status_code == 404
1716+
assert response.json() == {"detail": "Invalid chunk payload"}
1717+
mock_create.assert_called_once()
1718+
1719+
15651720
@pytest.mark.asyncio
15661721
async def test_hybrid_search_exception(vdb_core_mock, auth_data):
15671722
"""

test/backend/data_process/test_tasks.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,13 @@ def test_extract_error_code_parses_detail_and_regex_and_unknown():
10121012
assert extract_error_code("no code here") == "unknown_error"
10131013

10141014

1015+
def test_extract_error_code_top_level_key():
1016+
from backend.data_process.tasks import extract_error_code
1017+
1018+
payload = json.dumps({"error_code": "top_level"})
1019+
assert extract_error_code(payload) == "top_level"
1020+
1021+
10151022
def test_save_error_to_redis_branches(monkeypatch):
10161023
from backend.data_process.tasks import save_error_to_redis
10171024

@@ -1112,6 +1119,58 @@ def test_process_error_fallback_when_save_error_raises(monkeypatch, tmp_path):
11121119
) or self.states == []
11131120

11141121

1122+
def test_process_error_truncates_reason_when_no_error_code(monkeypatch, tmp_path):
1123+
"""process should truncate long messages when extract_error_code is falsy"""
1124+
tasks, fake_ray = import_tasks_with_fake_ray(monkeypatch, initialized=True)
1125+
1126+
long_msg = "x" * 250
1127+
error_json = json.dumps({"message": long_msg})
1128+
1129+
# Provide actor but make ray.get raise inside the try block
1130+
class FakeActor:
1131+
def __init__(self):
1132+
self.process_file = types.SimpleNamespace(remote=lambda *a, **k: "ref_err")
1133+
self.store_chunks_in_redis = types.SimpleNamespace(
1134+
remote=lambda *a, **k: None)
1135+
1136+
monkeypatch.setattr(tasks, "get_ray_actor", lambda: FakeActor())
1137+
fake_ray.get = lambda *_: (_ for _ in ()).throw(Exception(error_json))
1138+
# Force extract_error_code to return None so truncation path executes
1139+
monkeypatch.setattr(tasks, "extract_error_code", lambda *a, **k: None)
1140+
1141+
calls: list[str] = []
1142+
1143+
def save_and_capture(task_id, reason, start_time):
1144+
calls.append(reason)
1145+
1146+
monkeypatch.setattr(tasks, "save_error_to_redis", save_and_capture)
1147+
1148+
# Ensure source file exists so FileNotFound is not raised before ray.get
1149+
f = tmp_path / "exists.txt"
1150+
f.write_text("data")
1151+
1152+
self = FakeSelf("trunc-proc")
1153+
with pytest.raises(Exception):
1154+
tasks.process(
1155+
self,
1156+
source=str(f),
1157+
source_type="local",
1158+
chunking_strategy="basic",
1159+
index_name="idx",
1160+
original_filename="f.txt",
1161+
)
1162+
1163+
# Captured reason should be truncated because error_code is falsy
1164+
assert len(calls) >= 1
1165+
truncated_reason = calls[-1]
1166+
assert truncated_reason.endswith("...")
1167+
assert len(truncated_reason) <= 203
1168+
assert any(
1169+
s.get("meta", {}).get("stage") == "text_extraction_failed"
1170+
for s in self.states
1171+
)
1172+
1173+
11151174
def test_forward_cancel_check_warning_then_continue(monkeypatch):
11161175
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
11171176
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
@@ -1197,6 +1256,58 @@ def post(self, *a, **k):
11971256
assert "detail_err" in str(exc.value)
11981257

11991258

1259+
def test_forward_index_documents_regex_error_code(monkeypatch):
1260+
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
1261+
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
1262+
monkeypatch.setattr(tasks, "get_file_size", lambda *a, **k: 0)
1263+
1264+
class FakeResponse:
1265+
status = 500
1266+
1267+
async def text(self):
1268+
# Include quotes so regex r'\"error_code\": \"...\"' matches
1269+
return 'oops "error_code":"regex_branch"'
1270+
1271+
async def __aenter__(self):
1272+
return self
1273+
1274+
async def __aexit__(self, *a):
1275+
return False
1276+
1277+
class FakeSession:
1278+
def __init__(self, *a, **k):
1279+
pass
1280+
1281+
async def __aenter__(self):
1282+
return self
1283+
1284+
async def __aexit__(self, *a):
1285+
return False
1286+
1287+
def post(self, *a, **k):
1288+
return FakeResponse()
1289+
1290+
fake_aiohttp = types.SimpleNamespace(
1291+
TCPConnector=lambda verify_ssl=False: None,
1292+
ClientTimeout=lambda total=None: None,
1293+
ClientSession=FakeSession,
1294+
ClientConnectorError=Exception,
1295+
ClientResponseError=Exception,
1296+
)
1297+
monkeypatch.setattr(tasks, "aiohttp", fake_aiohttp)
1298+
monkeypatch.setattr(tasks, "run_async", _run_coro)
1299+
1300+
self = FakeSelf("regex-err")
1301+
with pytest.raises(Exception) as exc:
1302+
tasks.forward(
1303+
self,
1304+
processed_data={"chunks": [{"content": "x", "metadata": {}}]},
1305+
index_name="idx",
1306+
source="/a.txt",
1307+
)
1308+
assert "regex_branch" in str(exc.value)
1309+
1310+
12001311
def test_forward_index_documents_client_connector_error(monkeypatch):
12011312
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
12021313
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
@@ -1273,6 +1384,69 @@ def post(self, *a, **k):
12731384
assert "Failed to connect to API" in str(exc.value) or "timeout" in str(exc.value).lower()
12741385

12751386

1387+
def test_forward_truncates_reason_when_no_error_code(monkeypatch):
1388+
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
1389+
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
1390+
monkeypatch.setattr(tasks, "get_file_size", lambda *a, **k: 0)
1391+
monkeypatch.setattr(tasks, "extract_error_code", lambda *a, **k: None)
1392+
1393+
long_msg = json.dumps({"message": "m" * 250})
1394+
monkeypatch.setattr(
1395+
tasks, "run_async", lambda coro: (_ for _ in ()).throw(Exception(long_msg))
1396+
)
1397+
1398+
reasons: list[str] = []
1399+
monkeypatch.setattr(
1400+
tasks, "save_error_to_redis", lambda tid, reason, st: reasons.append(reason)
1401+
)
1402+
1403+
self = FakeSelf("f-trunc")
1404+
with pytest.raises(Exception):
1405+
tasks.forward(
1406+
self,
1407+
processed_data={"chunks": [{"content": "x", "metadata": {}}]},
1408+
index_name="idx",
1409+
source="/a.txt",
1410+
)
1411+
1412+
assert reasons and reasons[0].endswith("...")
1413+
assert len(reasons[0]) <= 203
1414+
assert any(
1415+
s.get("meta", {}).get("stage") == "forward_task_failed" for s in self.states
1416+
)
1417+
1418+
1419+
def test_forward_fallback_truncates_on_non_json_error(monkeypatch):
1420+
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
1421+
monkeypatch.setattr(tasks, "ELASTICSEARCH_SERVICE", "http://api")
1422+
monkeypatch.setattr(tasks, "get_file_size", lambda *a, **k: 0)
1423+
monkeypatch.setattr(tasks, "extract_error_code", lambda *a, **k: None)
1424+
1425+
monkeypatch.setattr(
1426+
tasks, "run_async", lambda coro: (_ for _ in ()).throw(Exception("n" * 250))
1427+
)
1428+
1429+
reasons: list[str] = []
1430+
monkeypatch.setattr(
1431+
tasks, "save_error_to_redis", lambda tid, reason, st: reasons.append(reason)
1432+
)
1433+
1434+
self = FakeSelf("f-fallback")
1435+
with pytest.raises(Exception):
1436+
tasks.forward(
1437+
self,
1438+
processed_data={"chunks": [{"content": "x", "metadata": {}}]},
1439+
index_name="idx",
1440+
source="/a.txt",
1441+
)
1442+
1443+
assert reasons and reasons[0].endswith("...")
1444+
assert len(reasons[0]) <= 203
1445+
assert any(
1446+
s.get("meta", {}).get("stage") == "forward_task_failed" for s in self.states
1447+
)
1448+
1449+
12761450
def test_forward_error_truncates_reason_and_uses_save(monkeypatch):
12771451
tasks, _ = import_tasks_with_fake_ray(monkeypatch)
12781452
long_message = "m" * 250

0 commit comments

Comments
 (0)