2121from llama_index .core .schema import BaseNode
2222from llama_index .embeddings .openai_like import OpenAILikeEmbedding
2323from pydantic import Field
24- from tos .models2 import Vector , VectorData
2524from typing_extensions import Any , override
2625
2726import veadk .config # noqa E401
3332try :
3433 from tos .vector_client import VectorClient
3534 from tos import DataType , DistanceMetricType
35+ from tos .exceptions import TosServerError
36+ from tos .models2 import Vector , VectorData
3637except ImportError :
3738 raise ImportError (
3839 "Please install VeADK extensions\n pip install veadk-python[extensions]"
@@ -76,25 +77,40 @@ def model_post_init(self, __context: Any) -> None:
7677 )
7778
7879 def _bucket_exists (self ) -> bool :
79- bucket_list_resp = self ._tos_client .list_vector_buckets ()
80- bucket_list = [
81- bucket .vector_bucket_name for bucket in bucket_list_resp .vector_buckets
82- ]
83- if self .tos_vector_bucket_name in bucket_list :
84- return True
85- else :
86- return False
80+ try :
81+ bucket_exist = self ._tos_client .get_vector_bucket (
82+ vector_bucket_name = self .tos_vector_bucket_name ,
83+ account_id = self .tos_vector_account_id ,
84+ )
85+ return bucket_exist .status_code == 200
86+ except TosServerError as e :
87+ if e .status_code == 404 :
88+ return False
89+ else :
90+ raise e
8791
8892 def _index_exists (self ) -> bool :
89- index_list_resp = self ._tos_client .list_indexes (
90- vector_bucket_name = self .tos_vector_bucket_name ,
91- account_id = self .tos_vector_account_id ,
92- )
93- index_list = [index .index_name for index in index_list_resp .indexes ]
94- if self .index in index_list :
95- return True
96- else :
97- return False
93+ try :
94+ index_exist = self ._tos_client .get_index (
95+ vector_bucket_name = self .tos_vector_bucket_name ,
96+ account_id = self .tos_vector_account_id ,
97+ index_name = self .index ,
98+ )
99+ return index_exist .status_code == 200
100+ except TosServerError as e :
101+ if e .status_code == 404 :
102+ return False
103+ else :
104+ raise e
105+
106+ def _split_documents (self , documents : list [Document ]) -> list [BaseNode ]:
107+ """Split document into chunks"""
108+ nodes = []
109+ for document in documents :
110+ splitter = get_llama_index_splitter (document .metadata .get ("file_path" , "" ))
111+ _nodes = splitter .get_nodes_from_documents ([document ])
112+ nodes .extend (_nodes )
113+ return nodes
98114
99115 def _create_index (self ):
100116 if not self ._bucket_exists ():
@@ -118,6 +134,8 @@ def _process_and_store_documents(self, documents: list[Document]) -> bool:
118134 nodes = self ._split_documents (documents )
119135 vectors = []
120136 for node in nodes :
137+ if not node .text :
138+ continue
121139 embedding = self ._embed_model .get_text_embedding (node .text )
122140 vectors .append (
123141 Vector (
@@ -163,15 +181,7 @@ def search(self, query: str, top_k: int = 5) -> list[str]:
163181 index_name = self .index ,
164182 query_vector = VectorData (float32 = query_vector ),
165183 top_k = top_k ,
184+ return_metadata = True ,
166185 )
167186
168187 return [vector .metadata ["text" ] for vector in search_result .vectors ]
169-
170- def _split_documents (self , documents : list [Document ]) -> list [BaseNode ]:
171- """Split document into chunks"""
172- nodes = []
173- for document in documents :
174- splitter = get_llama_index_splitter (document .metadata .get ("file_path" , "" ))
175- _nodes = splitter .get_nodes_from_documents ([document ])
176- nodes .extend (_nodes )
177- return nodes
0 commit comments