Skip to content

Commit d2653b9

Browse files
committed
feat: tos for knowledgebase
1 parent 9174185 commit d2653b9

File tree

2 files changed

+45
-28
lines changed

2 files changed

+45
-28
lines changed

veadk/knowledgebase/backends/tos_vector_backend.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from llama_index.core.schema import BaseNode
2222
from llama_index.embeddings.openai_like import OpenAILikeEmbedding
2323
from pydantic import Field
24-
from tos.models2 import Vector, VectorData
2524
from typing_extensions import Any, override
2625

2726
import veadk.config # noqa E401
@@ -33,6 +32,8 @@
3332
try:
3433
from tos.vector_client import VectorClient
3534
from tos import DataType, DistanceMetricType
35+
from tos.exceptions import TosServerError
36+
from tos.models2 import Vector, VectorData
3637
except ImportError:
3738
raise ImportError(
3839
"Please install VeADK extensions\npip install veadk-python[extensions]"
@@ -76,25 +77,40 @@ def model_post_init(self, __context: Any) -> None:
7677
)
7778

7879
def _bucket_exists(self) -> bool:
79-
bucket_list_resp = self._tos_client.list_vector_buckets()
80-
bucket_list = [
81-
bucket.vector_bucket_name for bucket in bucket_list_resp.vector_buckets
82-
]
83-
if self.tos_vector_bucket_name in bucket_list:
84-
return True
85-
else:
86-
return False
80+
try:
81+
bucket_exist = self._tos_client.get_vector_bucket(
82+
vector_bucket_name=self.tos_vector_bucket_name,
83+
account_id=self.tos_vector_account_id,
84+
)
85+
return bucket_exist.status_code == 200
86+
except TosServerError as e:
87+
if e.status_code == 404:
88+
return False
89+
else:
90+
raise e
8791

8892
def _index_exists(self) -> bool:
89-
index_list_resp = self._tos_client.list_indexes(
90-
vector_bucket_name=self.tos_vector_bucket_name,
91-
account_id=self.tos_vector_account_id,
92-
)
93-
index_list = [index.index_name for index in index_list_resp.indexes]
94-
if self.index in index_list:
95-
return True
96-
else:
97-
return False
93+
try:
94+
index_exist = self._tos_client.get_index(
95+
vector_bucket_name=self.tos_vector_bucket_name,
96+
account_id=self.tos_vector_account_id,
97+
index_name=self.index,
98+
)
99+
return index_exist.status_code == 200
100+
except TosServerError as e:
101+
if e.status_code == 404:
102+
return False
103+
else:
104+
raise e
105+
106+
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
107+
"""Split document into chunks"""
108+
nodes = []
109+
for document in documents:
110+
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
111+
_nodes = splitter.get_nodes_from_documents([document])
112+
nodes.extend(_nodes)
113+
return nodes
98114

99115
def _create_index(self):
100116
if not self._bucket_exists():
@@ -118,6 +134,8 @@ def _process_and_store_documents(self, documents: list[Document]) -> bool:
118134
nodes = self._split_documents(documents)
119135
vectors = []
120136
for node in nodes:
137+
if not node.text:
138+
continue
121139
embedding = self._embed_model.get_text_embedding(node.text)
122140
vectors.append(
123141
Vector(
@@ -163,15 +181,7 @@ def search(self, query: str, top_k: int = 5) -> list[str]:
163181
index_name=self.index,
164182
query_vector=VectorData(float32=query_vector),
165183
top_k=top_k,
184+
return_metadata=True,
166185
)
167186

168187
return [vector.metadata["text"] for vector in search_result.vectors]
169-
170-
def _split_documents(self, documents: list[Document]) -> list[BaseNode]:
171-
"""Split document into chunks"""
172-
nodes = []
173-
for document in documents:
174-
splitter = get_llama_index_splitter(document.metadata.get("file_path", ""))
175-
_nodes = splitter.get_nodes_from_documents([document])
176-
nodes.extend(_nodes)
177-
return nodes

veadk/knowledgebase/knowledgebase.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ def _get_backend_cls(backend: str) -> type[BaseKnowledgebaseBackend]:
5151
)
5252

5353
return RedisKnowledgeBackend
54+
case "tos_vector":
55+
from veadk.knowledgebase.backends.tos_vector_backend import (
56+
TosVectorKnowledgeBackend,
57+
)
58+
59+
return TosVectorKnowledgeBackend
5460

5561
raise ValueError(f"Unsupported knowledgebase backend: {backend}")
5662

@@ -165,7 +171,8 @@ class KnowledgeBase(BaseModel):
165171
description: str = "This knowledgebase stores some user-related information."
166172

167173
backend: Union[
168-
Literal["local", "opensearch", "viking", "redis"], BaseKnowledgebaseBackend
174+
Literal["local", "opensearch", "viking", "redis", "tos_vector"],
175+
BaseKnowledgebaseBackend,
169176
] = "local"
170177

171178
backend_config: dict = Field(default_factory=dict)

0 commit comments

Comments
 (0)