1111# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212# See the License for the specific language governing permissions and
1313# limitations under the License.
14-
14+ import io
15+ import os .path
1516from typing import Any , BinaryIO , Literal , TextIO
1617
1718from pydantic import BaseModel
1819
1920from veadk .database .database_adapter import get_knowledgebase_database_adapter
2021from veadk .database .database_factory import DatabaseFactory
22+ from veadk .utils .misc import formatted_timestamp
2123from veadk .utils .logger import get_logger
2224
2325logger = get_logger (__name__ )
@@ -66,10 +68,65 @@ def add(
6668 )
6769
6870 index = build_knowledgebase_index (app_name )
69-
7071 logger .info (f"Adding documents to knowledgebase: index={ index } " )
7172
72- self ._adapter .add (data = data , index = index )
73+ if self .backend == "viking" :
74+ # Case 1: Handling file paths or lists of file paths (str)
75+ if isinstance (data , str ) and os .path .isfile (data ):
76+ # 单个文件路径,直接调用client.add
77+ # 获取文件名(包括后缀名)
78+ if "file_name" not in kwargs or not kwargs ["file_name" ]:
79+ kwargs ["file_name" ] = os .path .basename (data )
80+ return self ._adapter .add (data = data , index = index , ** kwargs )
81+ # Case 2: Handling when list[str] is a full path (list[str])
82+ if isinstance (data , list ):
83+ if all (isinstance (item , str ) for item in data ):
84+ all_paths = all (os .path .isfile (item ) for item in data )
85+ all_not_paths = all (not os .path .isfile (item ) for item in data )
86+ if all_paths :
87+ if "file_name" not in kwargs or not kwargs ["file_name" ]:
88+ kwargs ["file_name" ] = [
89+ os .path .basename (item ) for item in data
90+ ]
91+ return self ._adapter .add (data = data , index = index , ** kwargs )
92+ elif (
93+ not all_not_paths
94+ ): # Prevent the occurrence of non-existent paths
95+ # There is a mixture of paths and non-paths
96+ raise ValueError (
97+ "Mixed file paths and content strings in list are not allowed"
98+ )
99+ # Case 3: Handling strings or string arrays (content) (str or list[str])
100+ if isinstance (data , str ) or (
101+ isinstance (data , list ) and all (isinstance (item , str ) for item in data )
102+ ):
103+ if "file_name" not in kwargs or not kwargs ["file_name" ]:
104+ if isinstance (data , str ):
105+ kwargs ["file_name" ] = f"{ formatted_timestamp ()} .txt"
106+ else : # list[str] without file_names
107+ prefix_file_name = formatted_timestamp ()
108+ kwargs ["file_name" ] = [
109+ f"{ prefix_file_name } _{ i } .txt" for i in range (len (data ))
110+ ]
111+ return self ._adapter .add (data = data , index = index , ** kwargs )
112+
113+ # Case 4: Handling binary data (bytes)
114+ if isinstance (data , bytes ):
115+ # user must give file_name
116+ if "file_name" not in kwargs :
117+ raise ValueError ("file_name must be provided for binary data" )
118+ return self ._adapter .add (data = data , index = index , ** kwargs )
119+
120+ # Case 5: Handling file objects TextIO or BinaryIO
121+ if isinstance (data , (io .TextIOWrapper , io .BufferedReader )):
122+ if not kwargs .get ("file_name" ) and hasattr (data , "name" ):
123+ kwargs ["file_name" ] = os .path .basename (data .name )
124+ return self ._adapter .add (data = data , index = index , ** kwargs )
125+ # Case6: Unsupported data type
126+ raise TypeError (f"Unsupported data type: { type (data )} " )
127+
128+ # not viking
129+ return self ._adapter .add (data = data , index = index , ** kwargs )
73130
74131 def search (self , query : str , app_name : str , top_k : int | None = None ) -> list [str ]:
75132 top_k = self .top_k if top_k is None else top_k
@@ -93,4 +150,4 @@ def delete_doc(self, app_name: str, id: str) -> bool:
93150
94151 def list_docs (self , app_name : str , offset : int = 0 , limit : int = 100 ) -> list [dict ]:
95152 index = build_knowledgebase_index (app_name )
96- return self ._adapter .list_docs (index = index , offset = offset , limit = limit )
153+ return self ._adapter .list_chunks (index = index , offset = offset , limit = limit )
0 commit comments