Skip to content

Commit c750ce4

Browse files
authored
feat(kb): enhance upload methods with metadata support and add KnowledgebaseEntry model (#190)
* feat(kb): add tests for Viking knowledgebase and improve TOS bucket handling * fix(tests): mock backend upload in viking knowledgebase test * refactor(tests): remove unused test for Viking knowledgebase text addition * feat(kb): add tests for Viking knowledgebase and improve TOS bucket handling * feat(knowledgebase): enhance upload methods with metadata support and add KnowledgebaseEntry model * fix(tos): set default bucket name for VeTOS class * fix(vikingdb): include bucket name in TOS upload method
1 parent b6f42f2 commit c750ce4

File tree

7 files changed

+319
-87
lines changed

7 files changed

+319
-87
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@ cython_debug/
167167
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
168168
.idea/
169169

170+
# Visual Studio Code
171+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
172+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
173+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
174+
# you could uncomment the following to ignore the entire vscode folder
175+
.vscode/
176+
170177
# Ruff stuff:
171178
.ruff_cache/
172179

veadk/integrations/ve_tos/ve_tos.py

Lines changed: 100 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from io import StringIO
1615
import asyncio
1716
import os
1817
from datetime import datetime
19-
from typing import TYPE_CHECKING, Union, List, Optional
18+
from io import StringIO
19+
from typing import TYPE_CHECKING, List, Optional, Union
2020
from urllib.parse import urlparse
21-
from veadk.utils.misc import getenv
21+
2222
from veadk.consts import DEFAULT_TOS_BUCKET_NAME
2323
from veadk.utils.logger import get_logger
24+
from veadk.utils.misc import getenv
2425

2526
if TYPE_CHECKING:
2627
pass
@@ -178,7 +179,18 @@ def _set_cors_rules(self, bucket_name: str) -> bool:
178179
return False
179180

180181
def _build_object_key_for_file(self, data_path: str) -> str:
181-
"""generate TOS object key"""
182+
"""Builds the TOS object key and URL for the given parameters.
183+
184+
Args:
185+
user_id (str): User ID
186+
app_name (str): App name
187+
session_id (str): Session ID
188+
data_path (str): Data path
189+
190+
Returns:
191+
tuple[str, str]: Object key and TOS URL.
192+
"""
193+
182194
parsed_url = urlparse(data_path)
183195

184196
# Generate object key
@@ -256,14 +268,33 @@ def build_tos_signed_url(self, object_key: str, bucket_name: str = "") -> str:
256268

257269
# deprecated
258270
def upload(
259-
self, data: Union[str, bytes], bucket_name: str = "", object_key: str = ""
271+
self,
272+
data: Union[str, bytes],
273+
bucket_name: str = "",
274+
object_key: str = "",
275+
metadata: dict | None = None,
260276
):
277+
"""Uploads data to TOS.
278+
279+
Args:
280+
data (Union[str, bytes]): The data to upload, either as a file path or raw bytes.
281+
bucket_name (str): The name of the TOS bucket to upload to.
282+
object_key (str): The object key for the uploaded data.
283+
metadata (dict | None, optional): Metadata to associate with the object. Defaults to None.
284+
285+
Raises:
286+
ValueError: If the data type is unsupported.
287+
"""
261288
if isinstance(data, str):
262289
# data is a file path
263-
return asyncio.to_thread(self.upload_file, data, bucket_name, object_key)
290+
return asyncio.to_thread(
291+
self.upload_file, data, bucket_name, object_key, metadata
292+
)
264293
elif isinstance(data, bytes):
265294
# data is bytes content
266-
return asyncio.to_thread(self.upload_bytes, data, bucket_name, object_key)
295+
return asyncio.to_thread(
296+
self.upload_bytes, data, bucket_name, object_key, metadata
297+
)
267298
else:
268299
error_msg = f"Upload failed: data type error. Only str (file path) and bytes are supported, got {type(data)}"
269300
logger.error(error_msg)
@@ -287,14 +318,19 @@ def _ensure_client_and_bucket(self, bucket_name: str) -> bool:
287318
return True
288319

289320
def upload_text(
290-
self, text: str, bucket_name: str = "", object_key: str = ""
321+
self,
322+
text: str,
323+
bucket_name: str = "",
324+
object_key: str = "",
325+
metadata: dict | None = None,
291326
) -> None:
292327
"""Upload text content to TOS bucket
293328
294329
Args:
295330
text: Text content to upload
296331
bucket_name: TOS bucket name
297332
object_key: Object key, auto-generated if None
333+
metadata: Metadata to associate with the object
298334
"""
299335
bucket_name = self._check_bucket_name(bucket_name)
300336
if not object_key:
@@ -304,7 +340,9 @@ def upload_text(
304340
return
305341
data = StringIO(text)
306342
try:
307-
self._client.put_object(bucket=bucket_name, key=object_key, content=data)
343+
self._client.put_object(
344+
bucket=bucket_name, key=object_key, content=data, meta=metadata
345+
)
308346
logger.debug(f"Upload success, object_key: {object_key}")
309347
return
310348
except Exception as e:
@@ -314,14 +352,19 @@ def upload_text(
314352
data.close()
315353

316354
async def async_upload_text(
317-
self, text: str, bucket_name: str = "", object_key: str = ""
355+
self,
356+
text: str,
357+
bucket_name: str = "",
358+
object_key: str = "",
359+
metadata: dict | None = None,
318360
) -> None:
319361
"""Asynchronously upload text content to TOS bucket
320362
321363
Args:
322364
text: Text content to upload
323365
bucket_name: TOS bucket name
324366
object_key: Object key, auto-generated if None
367+
metadata: Metadata to associate with the object
325368
"""
326369
bucket_name = self._check_bucket_name(bucket_name)
327370
if not object_key:
@@ -337,6 +380,7 @@ async def async_upload_text(
337380
bucket=bucket_name,
338381
key=object_key,
339382
content=data,
383+
meta=metadata,
340384
)
341385
logger.debug(f"Async upload success, object_key: {object_key}")
342386
return
@@ -347,14 +391,19 @@ async def async_upload_text(
347391
data.close()
348392

349393
def upload_bytes(
350-
self, data: bytes, bucket_name: str = "", object_key: str = ""
394+
self,
395+
data: bytes,
396+
bucket_name: str = "",
397+
object_key: str = "",
398+
metadata: dict | None = None,
351399
) -> None:
352400
"""Upload byte data to TOS bucket
353401
354402
Args:
355403
data: Byte data to upload
356404
bucket_name: TOS bucket name
357405
object_key: Object key, auto-generated if None
406+
metadata: Metadata to associate with the object
358407
"""
359408
bucket_name = self._check_bucket_name(bucket_name)
360409
if not object_key:
@@ -363,22 +412,29 @@ def upload_bytes(
363412
if not self._ensure_client_and_bucket(bucket_name):
364413
return
365414
try:
366-
self._client.put_object(bucket=bucket_name, key=object_key, content=data)
415+
self._client.put_object(
416+
bucket=bucket_name, key=object_key, content=data, meta=metadata
417+
)
367418
logger.debug(f"Upload success, object_key: {object_key}")
368419
return
369420
except Exception as e:
370421
logger.error(f"Upload failed: {e}")
371422
return
372423

373424
async def async_upload_bytes(
374-
self, data: bytes, bucket_name: str = "", object_key: str = ""
425+
self,
426+
data: bytes,
427+
bucket_name: str = "",
428+
object_key: str = "",
429+
metadata: dict | None = None,
375430
) -> None:
376431
"""Asynchronously upload byte data to TOS bucket
377432
378433
Args:
379434
data: Byte data to upload
380435
bucket_name: TOS bucket name
381436
object_key: Object key, auto-generated if None
437+
metadata: Metadata to associate with the object
382438
"""
383439
bucket_name = self._check_bucket_name(bucket_name)
384440
if not object_key:
@@ -393,6 +449,7 @@ async def async_upload_bytes(
393449
bucket=bucket_name,
394450
key=object_key,
395451
content=data,
452+
meta=metadata,
396453
)
397454
logger.debug(f"Async upload success, object_key: {object_key}")
398455
return
@@ -401,14 +458,19 @@ async def async_upload_bytes(
401458
return
402459

403460
def upload_file(
404-
self, file_path: str, bucket_name: str = "", object_key: str = ""
461+
self,
462+
file_path: str,
463+
bucket_name: str = "",
464+
object_key: str = "",
465+
metadata: dict | None = None,
405466
) -> None:
406467
"""Upload file to TOS bucket
407468
408469
Args:
409470
file_path: Local file path
410471
bucket_name: TOS bucket name
411472
object_key: Object key, auto-generated if None
473+
metadata: Metadata to associate with the object
412474
"""
413475
bucket_name = self._check_bucket_name(bucket_name)
414476
if not object_key:
@@ -418,7 +480,7 @@ def upload_file(
418480
return
419481
try:
420482
self._client.put_object_from_file(
421-
bucket=bucket_name, key=object_key, file_path=file_path
483+
bucket=bucket_name, key=object_key, file_path=file_path, meta=metadata
422484
)
423485
logger.debug(f"Upload success, object_key: {object_key}")
424486
return
@@ -427,14 +489,19 @@ def upload_file(
427489
return
428490

429491
async def async_upload_file(
430-
self, file_path: str, bucket_name: str = "", object_key: str = ""
492+
self,
493+
file_path: str,
494+
bucket_name: str = "",
495+
object_key: str = "",
496+
metadata: dict | None = None,
431497
) -> None:
432498
"""Asynchronously upload file to TOS bucket
433499
434500
Args:
435501
file_path: Local file path
436502
bucket_name: TOS bucket name
437503
object_key: Object key, auto-generated if None
504+
metadata: Metadata to associate with the object
438505
"""
439506
bucket_name = self._check_bucket_name(bucket_name)
440507
if not object_key:
@@ -449,6 +516,7 @@ async def async_upload_file(
449516
bucket=bucket_name,
450517
key=object_key,
451518
file_path=file_path,
519+
meta=metadata,
452520
)
453521
logger.debug(f"Async upload success, object_key: {object_key}")
454522
return
@@ -461,13 +529,15 @@ def upload_files(
461529
file_paths: List[str],
462530
bucket_name: str = "",
463531
object_keys: Optional[List[str]] = None,
532+
metadata: dict | None = None,
464533
) -> None:
465534
"""Upload multiple files to TOS bucket
466535
467536
Args:
468537
file_paths: List of local file paths
469538
bucket_name: TOS bucket name
470539
object_keys: List of object keys, auto-generated if empty or length mismatch
540+
metadata: Metadata to associate with the object
471541
"""
472542
bucket_name = self._check_bucket_name(bucket_name)
473543

@@ -487,7 +557,7 @@ def upload_files(
487557
try:
488558
for file_path, object_key in zip(file_paths, object_keys):
489559
# Note: upload_file method doesn't return value, we use exceptions to determine success
490-
self.upload_file(file_path, bucket_name, object_key)
560+
self.upload_file(file_path, bucket_name, object_key, metadata=metadata)
491561
return
492562
except Exception as e:
493563
logger.error(f"Upload files failed: {str(e)}")
@@ -498,13 +568,15 @@ async def async_upload_files(
498568
file_paths: List[str],
499569
bucket_name: str = "",
500570
object_keys: Optional[List[str]] = None,
571+
metadata: dict | None = None,
501572
) -> None:
502573
"""Asynchronously upload multiple files to TOS bucket
503574
504575
Args:
505576
file_paths: List of local file paths
506577
bucket_name: TOS bucket name
507578
object_keys: List of object keys, auto-generated if empty or length mismatch
579+
metadata: Metadata to associate with the object
508580
"""
509581
bucket_name = self._check_bucket_name(bucket_name)
510582

@@ -529,19 +601,23 @@ async def async_upload_files(
529601
bucket=bucket_name,
530602
key=object_key,
531603
file_path=file_path,
604+
metadata=metadata,
532605
)
533606
logger.debug(f"Async upload success, object_key: {object_key}")
534607
return
535608
except Exception as e:
536609
logger.error(f"Async upload files failed: {str(e)}")
537610
return
538611

539-
def upload_directory(self, directory_path: str, bucket_name: str = "") -> None:
612+
def upload_directory(
613+
self, directory_path: str, bucket_name: str = "", metadata: dict | None = None
614+
) -> None:
540615
"""Upload entire directory to TOS bucket
541616
542617
Args:
543618
directory_path: Local directory path
544619
bucket_name: TOS bucket name
620+
metadata: Metadata to associate with the objects
545621
"""
546622
bucket_name = self._check_bucket_name(bucket_name)
547623

@@ -555,7 +631,7 @@ def _upload_dir(root_dir):
555631
# Use relative path of file as object key
556632
object_key = os.path.relpath(path, directory_path)
557633
# upload_file method doesn't return value, use exceptions to determine success
558-
self.upload_file(path, bucket_name, object_key)
634+
self.upload_file(path, bucket_name, object_key, metadata=metadata)
559635

560636
try:
561637
_upload_dir(directory_path)
@@ -566,13 +642,14 @@ def _upload_dir(root_dir):
566642
raise
567643

568644
async def async_upload_directory(
569-
self, directory_path: str, bucket_name: str = ""
645+
self, directory_path: str, bucket_name: str = "", metadata: dict | None = None
570646
) -> None:
571647
"""Asynchronously upload entire directory to TOS bucket
572648
573649
Args:
574650
directory_path: Local directory path
575651
bucket_name: TOS bucket name
652+
metadata: Metadata to associate with the objects
576653
"""
577654
bucket_name = self._check_bucket_name(bucket_name)
578655

@@ -586,7 +663,9 @@ async def _aupload_dir(root_dir):
586663
# Use relative path of file as object key
587664
object_key = os.path.relpath(path, directory_path)
588665
# Asynchronously upload single file
589-
await self.async_upload_file(path, bucket_name, object_key)
666+
await self.async_upload_file(
667+
path, bucket_name, object_key, metadata=metadata
668+
)
590669

591670
try:
592671
await _aupload_dir(directory_path)

veadk/knowledgebase/backends/base_backend.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ def precheck_index_naming(self) -> None:
2929
"""
3030

3131
@abstractmethod
32-
def add_from_directory(self, directory: str, **kwargs) -> bool:
32+
def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
3333
"""Add knowledge from file path to knowledgebase"""
3434

3535
@abstractmethod
36-
def add_from_files(self, files: list[str], **kwargs) -> bool:
36+
def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
3737
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
3838

3939
@abstractmethod
40-
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
40+
def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
4141
"""Add knowledge from text to knowledgebase"""
4242

4343
@abstractmethod
44-
def search(self, **kwargs) -> list:
44+
def search(self, *args, **kwargs) -> list:
4545
"""Search knowledge from knowledgebase"""
4646

4747
# Optional methods for future use:

0 commit comments

Comments
 (0)