Skip to content

Commit 18d778f

Browse files
committed
feat: add delete_doc and list_docs
1 parent e22e3b4 commit 18d778f

File tree

5 files changed

+148
-7
lines changed

5 files changed

+148
-7
lines changed

veadk/database/database_adapter.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ def query(self, query: str, index: str, top_k: int = 0) -> list:
5454
logger.error(f"Failed to search from Redis: index={index} error={e}")
5555
raise e
5656

57+
def delete_docs(self, index: str, ids: list[int]): ...
58+
59+
def list_docs(
60+
self, index: str, offset: int = 0, limit: int = 100
61+
) -> list[dict]: ...
62+
5763

5864
class RelationalDatabaseAdapter:
5965
def __init__(self, client):
@@ -108,6 +114,12 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
108114

109115
return [item["data"] for item in results]
110116

117+
def delete_docs(self, index: str, ids: list[int]): ...
118+
119+
def list_docs(
120+
self, index: str, offset: int = 0, limit: int = 100
121+
) -> list[dict]: ...
122+
111123

112124
class VectorDatabaseAdapter:
113125
def __init__(self, client):
@@ -152,6 +164,23 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
152164
top_k=top_k,
153165
)
154166

167+
def delete_doc(self, index: str, id: str) -> bool:
168+
self._validate_index(index)
169+
logger.debug(f"Deleting documents from vector database: index={index} id={id}")
170+
try:
171+
self.client.delete_by_id(collection_name=index, id=id)
172+
return True
173+
except Exception as e:
174+
logger.error(
175+
f"Failed to delete document from vector database: index={index} id={id} error={e}"
176+
)
177+
return False
178+
179+
def list_docs(self, index: str, offset: int = 0, limit: int = 1000) -> list[dict]:
180+
self._validate_index(index)
181+
logger.debug(f"Listing documents from vector database: index={index}")
182+
return self.client.list_docs(collection_name=index, offset=offset, limit=limit)
183+
155184

156185
class VikingDatabaseAdapter:
157186
def __init__(self, client):
@@ -212,6 +241,16 @@ def query(self, query: str, index: str, top_k: int) -> list[str]:
212241

213242
return self.client.query(query, collection_name=index, top_k=top_k)
214243

244+
def delete_doc(self, index: str, id: str) -> bool:
245+
self._validate_index(index)
246+
logger.debug(f"Deleting documents from vector database: index={index} id={id}")
247+
return self.client.delete_by_id(collection_name=index, id=id)
248+
249+
def list_docs(self, index: str, offset: int, limit: int) -> list[dict]:
250+
self._validate_index(index)
251+
logger.debug(f"Listing documents from vector database: index={index}")
252+
return self.client.list_docs(collection_name=index, offset=offset, limit=limit)
253+
215254

216255
class VikingMemoryDatabaseAdapter:
217256
def __init__(self, client):
@@ -248,6 +287,12 @@ def query(self, query: str, index: str, top_k: int, **kwargs):
248287
result = self.client.query(query, collection_name=index, top_k=top_k, **kwargs)
249288
return result
250289

290+
def delete_docs(self, index: str, ids: list[int]):
291+
raise NotImplementedError("VikingMemoryDatabase does not support delete_docs")
292+
293+
def list_docs(self, index: str):
294+
raise NotImplementedError("VikingMemoryDatabase does not support list_docs")
295+
251296

252297
class LocalDatabaseAdapter:
253298
def __init__(self, client):
@@ -261,6 +306,12 @@ def add(self, data: list[str], **kwargs):
261306
def query(self, query: str, **kwargs):
262307
return self.client.query(query, **kwargs)
263308

309+
def delete_doc(self, index: str, id: str) -> bool:
310+
return self.client.delete_doc(id)
311+
312+
def list_docs(self, index: str, offset: int = 0, limit: int = 100) -> list[dict]:
313+
return self.client.list_docs(offset=offset, limit=limit)
314+
264315

265316
MAPPING = {
266317
"RedisDatabase": KVDatabaseAdapter,

veadk/database/local_database.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,35 @@ class LocalDataBase(BaseDatabase):
2424

2525
def __init__(self, **kwargs):
2626
super().__init__()
27-
self.data = []
27+
self.data = {} # 改为字典
2828
self._type = "local"
29+
self._next_id = 0 # 用于生成唯一ID
2930

3031
def add_texts(self, texts: list[str], **kwargs):
31-
self.data.extend(texts)
32+
for text in texts:
33+
self.data[str(self._next_id)] = text
34+
self._next_id += 1
3235

3336
def is_empty(self):
3437
return len(self.data) == 0
3538

3639
def query(self, query: str, **kwargs: Any) -> list[str]:
37-
return self.data
40+
return list(self.data.values())
3841

3942
def delete(self, **kwargs: Any):
40-
self.data = []
43+
self.data = {}
4144

4245
def add(self, texts: list[str], **kwargs: Any):
4346
return self.add_texts(texts)
47+
48+
def list_docs(self, **kwargs: Any) -> list[dict]:
49+
return [{"id": id, "content": content} for id, content in self.data.items()]
50+
51+
def delete_doc(self, id: str, **kwargs: Any):
52+
if id not in self.data:
53+
raise ValueError(f"id {id} not found")
54+
try:
55+
del self.data[id]
56+
return True
57+
except Exception:
58+
return False

veadk/database/vector/opensearch_vector_database.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,20 +219,22 @@ def list_all_collection(self) -> list:
219219
response = self._opensearch_client.indices.get_alias()
220220
return list(response.keys())
221221

222-
def get_all_docs(self, collection_name: str, size: int = 10000) -> list[dict]:
222+
def list_docs(
223+
self, collection_name: str, offset: int = 0, limit: int = 10000
224+
) -> list[dict]:
223225
"""Match all docs in one index of OpenSearch"""
224226
if not self.collection_exists(collection_name):
225227
logger.warning(
226228
f"Get all docs, but collection {collection_name} does not exist. return a empty list."
227229
)
228230
return []
229231

230-
query = {"size": size, "query": {"match_all": {}}}
232+
query = {"size": limit, "from": offset, "query": {"match_all": {}}}
231233
response = self._opensearch_client.search(index=collection_name, body=query)
232234
return [
233235
{
234236
"id": hit["_id"],
235-
"page_content": hit["_source"]["page_content"],
237+
"content": hit["_source"]["page_content"],
236238
}
237239
for hit in response["hits"]["hits"]
238240
]

veadk/database/viking/viking_database.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
doc_add_path = "/api/knowledge/doc/add"
4242
doc_info_path = "/api/knowledge/doc/info"
4343
doc_del_path = "/api/collection/drop"
44+
list_docs_path = "/api/knowledge/point/list"
45+
delete_docs_path = "/api/knowledge/point/delete"
4446

4547

4648
class VolcengineTOSConfig(BaseModel):
@@ -400,3 +402,66 @@ def collection_exists(self, collection_name: str) -> bool:
400402
return True
401403
else:
402404
return False
405+
406+
def list_docs(
407+
self, collection_name: str, offset: int = 0, limit: int = -1
408+
) -> list[dict]:
409+
request_params = {
410+
"collection_name": collection_name,
411+
"project": self.config.project,
412+
"offset": offset,
413+
"limit": limit,
414+
}
415+
416+
create_collection_req = prepare_request(
417+
method="POST",
418+
path=list_docs_path,
419+
config=self.config,
420+
data=request_params,
421+
)
422+
resp = requests.request(
423+
method=create_collection_req.method,
424+
url="https://{}{}".format(
425+
g_knowledge_base_domain, create_collection_req.path
426+
),
427+
headers=create_collection_req.headers,
428+
data=create_collection_req.body,
429+
)
430+
431+
result = resp.json()
432+
if result["code"] != 0:
433+
logger.error(f"Error in list_docs: {result['message']}")
434+
raise ValueError(f"Error in list_docs: {result['message']}")
435+
436+
data = [
437+
{"id": res["point_id"], "content": res["content"]}
438+
for res in result["data"]["point_list"]
439+
]
440+
return data
441+
442+
def delete_by_id(self, collection_name: str, id: str) -> bool:
443+
request_params = {
444+
"collection_name": collection_name,
445+
"project": self.config.project,
446+
"point_id": id,
447+
}
448+
449+
create_collection_req = prepare_request(
450+
method="POST",
451+
path=delete_docs_path,
452+
config=self.config,
453+
data=request_params,
454+
)
455+
resp = requests.request(
456+
method=create_collection_req.method,
457+
url="https://{}{}".format(
458+
g_knowledge_base_domain, create_collection_req.path
459+
),
460+
headers=create_collection_req.headers,
461+
data=create_collection_req.body,
462+
)
463+
464+
result = resp.json()
465+
if result["code"] != 0:
466+
return False
467+
return True

veadk/knowledgebase/knowledgebase.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,11 @@ def search(self, query: str, app_name: str, top_k: int | None = None) -> list[st
8080
if len(result) == 0:
8181
logger.warning(f"No documents found in knowledgebase. Query: {query}")
8282
return result
83+
84+
def delete_doc(self, app_name: str, id: str) -> bool:
85+
index = build_knowledgebase_index(app_name)
86+
return self.adapter.delete_doc(index=index, id=id)
87+
88+
def list_docs(self, app_name: str, offset: int = 0, limit: int = 100) -> list[dict]:
89+
index = build_knowledgebase_index(app_name)
90+
return self.adapter.list_docs(index=index, offset=offset, limit=limit)

0 commit comments

Comments
 (0)