Skip to content

Commit 1a2f87f

Browse files
committed
feat(knowledgebase): enhance upload methods with metadata support and add KnowledgebaseEntry model
1 parent 7e0f5d2 commit 1a2f87f

File tree

7 files changed

+243
-50
lines changed

7 files changed

+243
-50
lines changed

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,13 @@ cython_debug/
167167
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
168168
.idea/
169169

170+
# Visual Studio Code
171+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
172+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
173+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
174+
# you could uncomment the following to ignore the entire vscode folder
175+
.vscode/
176+
170177
# Ruff stuff:
171178
.ruff_cache/
172179

veadk/integrations/ve_tos/ve_tos.py

Lines changed: 100 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from io import StringIO
1615
import asyncio
1716
import os
1817
from datetime import datetime
19-
from typing import TYPE_CHECKING, Union, List, Optional
18+
from io import StringIO
19+
from typing import TYPE_CHECKING, List, Optional, Union
2020
from urllib.parse import urlparse
21-
from veadk.utils.misc import getenv
21+
2222
from veadk.consts import DEFAULT_TOS_BUCKET_NAME
2323
from veadk.utils.logger import get_logger
24+
from veadk.utils.misc import getenv
2425

2526
if TYPE_CHECKING:
2627
pass
@@ -179,7 +180,18 @@ def _set_cors_rules(self, bucket_name: str) -> bool:
179180
return False
180181

181182
def _build_object_key_for_file(self, data_path: str) -> str:
182-
"""generate TOS object key"""
183+
"""Builds the TOS object key and URL for the given parameters.
184+
185+
Args:
186+
user_id (str): User ID
187+
app_name (str): App name
188+
session_id (str): Session ID
189+
data_path (str): Data path
190+
191+
Returns:
192+
tuple[str, str]: Object key and TOS URL.
193+
"""
194+
183195
parsed_url = urlparse(data_path)
184196

185197
# Generate object key
@@ -245,14 +257,33 @@ def build_tos_url(self, object_key: str, bucket_name: str = "") -> str:
245257

246258
# deprecated
247259
def upload(
248-
self, data: Union[str, bytes], bucket_name: str = "", object_key: str = ""
260+
self,
261+
data: Union[str, bytes],
262+
bucket_name: str = "",
263+
object_key: str = "",
264+
metadata: dict | None = None,
249265
):
266+
"""Uploads data to TOS.
267+
268+
Args:
269+
data (Union[str, bytes]): The data to upload, either as a file path or raw bytes.
270+
bucket_name (str): The name of the TOS bucket to upload to.
271+
object_key (str): The object key for the uploaded data.
272+
metadata (dict | None, optional): Metadata to associate with the object. Defaults to None.
273+
274+
Raises:
275+
ValueError: If the data type is unsupported.
276+
"""
250277
if isinstance(data, str):
251278
# data is a file path
252-
return asyncio.to_thread(self.upload_file, data, bucket_name, object_key)
279+
return asyncio.to_thread(
280+
self.upload_file, data, bucket_name, object_key, metadata
281+
)
253282
elif isinstance(data, bytes):
254283
# data is bytes content
255-
return asyncio.to_thread(self.upload_bytes, data, bucket_name, object_key)
284+
return asyncio.to_thread(
285+
self.upload_bytes, data, bucket_name, object_key, metadata
286+
)
256287
else:
257288
error_msg = f"Upload failed: data type error. Only str (file path) and bytes are supported, got {type(data)}"
258289
logger.error(error_msg)
@@ -276,14 +307,19 @@ def _ensure_client_and_bucket(self, bucket_name: str) -> bool:
276307
return True
277308

278309
def upload_text(
279-
self, text: str, bucket_name: str = "", object_key: str = ""
310+
self,
311+
text: str,
312+
bucket_name: str = "",
313+
object_key: str = "",
314+
metadata: dict | None = None,
280315
) -> None:
281316
"""Upload text content to TOS bucket
282317
283318
Args:
284319
text: Text content to upload
285320
bucket_name: TOS bucket name
286321
object_key: Object key, auto-generated if None
322+
metadata: Metadata to associate with the object
287323
"""
288324
bucket_name = self._check_bucket_name(bucket_name)
289325
if not object_key:
@@ -293,7 +329,9 @@ def upload_text(
293329
return
294330
data = StringIO(text)
295331
try:
296-
self._client.put_object(bucket=bucket_name, key=object_key, content=data)
332+
self._client.put_object(
333+
bucket=bucket_name, key=object_key, content=data, meta=metadata
334+
)
297335
logger.debug(f"Upload success, object_key: {object_key}")
298336
return
299337
except Exception as e:
@@ -303,14 +341,19 @@ def upload_text(
303341
data.close()
304342

305343
async def async_upload_text(
306-
self, text: str, bucket_name: str = "", object_key: str = ""
344+
self,
345+
text: str,
346+
bucket_name: str = "",
347+
object_key: str = "",
348+
metadata: dict | None = None,
307349
) -> None:
308350
"""Asynchronously upload text content to TOS bucket
309351
310352
Args:
311353
text: Text content to upload
312354
bucket_name: TOS bucket name
313355
object_key: Object key, auto-generated if None
356+
metadata: Metadata to associate with the object
314357
"""
315358
bucket_name = self._check_bucket_name(bucket_name)
316359
if not object_key:
@@ -326,6 +369,7 @@ async def async_upload_text(
326369
bucket=bucket_name,
327370
key=object_key,
328371
content=data,
372+
meta=metadata,
329373
)
330374
logger.debug(f"Async upload success, object_key: {object_key}")
331375
return
@@ -336,14 +380,19 @@ async def async_upload_text(
336380
data.close()
337381

338382
def upload_bytes(
339-
self, data: bytes, bucket_name: str = "", object_key: str = ""
383+
self,
384+
data: bytes,
385+
bucket_name: str = "",
386+
object_key: str = "",
387+
metadata: dict | None = None,
340388
) -> None:
341389
"""Upload byte data to TOS bucket
342390
343391
Args:
344392
data: Byte data to upload
345393
bucket_name: TOS bucket name
346394
object_key: Object key, auto-generated if None
395+
metadata: Metadata to associate with the object
347396
"""
348397
bucket_name = self._check_bucket_name(bucket_name)
349398
if not object_key:
@@ -352,22 +401,29 @@ def upload_bytes(
352401
if not self._ensure_client_and_bucket(bucket_name):
353402
return
354403
try:
355-
self._client.put_object(bucket=bucket_name, key=object_key, content=data)
404+
self._client.put_object(
405+
bucket=bucket_name, key=object_key, content=data, meta=metadata
406+
)
356407
logger.debug(f"Upload success, object_key: {object_key}")
357408
return
358409
except Exception as e:
359410
logger.error(f"Upload failed: {e}")
360411
return
361412

362413
async def async_upload_bytes(
363-
self, data: bytes, bucket_name: str = "", object_key: str = ""
414+
self,
415+
data: bytes,
416+
bucket_name: str = "",
417+
object_key: str = "",
418+
metadata: dict | None = None,
364419
) -> None:
365420
"""Asynchronously upload byte data to TOS bucket
366421
367422
Args:
368423
data: Byte data to upload
369424
bucket_name: TOS bucket name
370425
object_key: Object key, auto-generated if None
426+
metadata: Metadata to associate with the object
371427
"""
372428
bucket_name = self._check_bucket_name(bucket_name)
373429
if not object_key:
@@ -382,6 +438,7 @@ async def async_upload_bytes(
382438
bucket=bucket_name,
383439
key=object_key,
384440
content=data,
441+
meta=metadata,
385442
)
386443
logger.debug(f"Async upload success, object_key: {object_key}")
387444
return
@@ -390,14 +447,19 @@ async def async_upload_bytes(
390447
return
391448

392449
def upload_file(
393-
self, file_path: str, bucket_name: str = "", object_key: str = ""
450+
self,
451+
file_path: str,
452+
bucket_name: str = "",
453+
object_key: str = "",
454+
metadata: dict | None = None,
394455
) -> None:
395456
"""Upload file to TOS bucket
396457
397458
Args:
398459
file_path: Local file path
399460
bucket_name: TOS bucket name
400461
object_key: Object key, auto-generated if None
462+
metadata: Metadata to associate with the object
401463
"""
402464
bucket_name = self._check_bucket_name(bucket_name)
403465
if not object_key:
@@ -407,7 +469,7 @@ def upload_file(
407469
return
408470
try:
409471
self._client.put_object_from_file(
410-
bucket=bucket_name, key=object_key, file_path=file_path
472+
bucket=bucket_name, key=object_key, file_path=file_path, meta=metadata
411473
)
412474
logger.debug(f"Upload success, object_key: {object_key}")
413475
return
@@ -416,14 +478,19 @@ def upload_file(
416478
return
417479

418480
async def async_upload_file(
419-
self, file_path: str, bucket_name: str = "", object_key: str = ""
481+
self,
482+
file_path: str,
483+
bucket_name: str = "",
484+
object_key: str = "",
485+
metadata: dict | None = None,
420486
) -> None:
421487
"""Asynchronously upload file to TOS bucket
422488
423489
Args:
424490
file_path: Local file path
425491
bucket_name: TOS bucket name
426492
object_key: Object key, auto-generated if None
493+
metadata: Metadata to associate with the object
427494
"""
428495
bucket_name = self._check_bucket_name(bucket_name)
429496
if not object_key:
@@ -438,6 +505,7 @@ async def async_upload_file(
438505
bucket=bucket_name,
439506
key=object_key,
440507
file_path=file_path,
508+
meta=metadata,
441509
)
442510
logger.debug(f"Async upload success, object_key: {object_key}")
443511
return
@@ -450,13 +518,15 @@ def upload_files(
450518
file_paths: List[str],
451519
bucket_name: str = "",
452520
object_keys: Optional[List[str]] = None,
521+
metadata: dict | None = None,
453522
) -> None:
454523
"""Upload multiple files to TOS bucket
455524
456525
Args:
457526
file_paths: List of local file paths
458527
bucket_name: TOS bucket name
459528
object_keys: List of object keys, auto-generated if empty or length mismatch
529+
metadata: Metadata to associate with the object
460530
"""
461531
bucket_name = self._check_bucket_name(bucket_name)
462532

@@ -476,7 +546,7 @@ def upload_files(
476546
try:
477547
for file_path, object_key in zip(file_paths, object_keys):
478548
# Note: upload_file method doesn't return value, we use exceptions to determine success
479-
self.upload_file(file_path, bucket_name, object_key)
549+
self.upload_file(file_path, bucket_name, object_key, metadata=metadata)
480550
return
481551
except Exception as e:
482552
logger.error(f"Upload files failed: {str(e)}")
@@ -487,13 +557,15 @@ async def async_upload_files(
487557
file_paths: List[str],
488558
bucket_name: str = "",
489559
object_keys: Optional[List[str]] = None,
560+
metadata: dict | None = None,
490561
) -> None:
491562
"""Asynchronously upload multiple files to TOS bucket
492563
493564
Args:
494565
file_paths: List of local file paths
495566
bucket_name: TOS bucket name
496567
object_keys: List of object keys, auto-generated if empty or length mismatch
568+
metadata: Metadata to associate with the object
497569
"""
498570
bucket_name = self._check_bucket_name(bucket_name)
499571

@@ -518,19 +590,23 @@ async def async_upload_files(
518590
bucket=bucket_name,
519591
key=object_key,
520592
file_path=file_path,
593+
metadata=metadata,
521594
)
522595
logger.debug(f"Async upload success, object_key: {object_key}")
523596
return
524597
except Exception as e:
525598
logger.error(f"Async upload files failed: {str(e)}")
526599
return
527600

528-
def upload_directory(self, directory_path: str, bucket_name: str = "") -> None:
601+
def upload_directory(
602+
self, directory_path: str, bucket_name: str = "", metadata: dict | None = None
603+
) -> None:
529604
"""Upload entire directory to TOS bucket
530605
531606
Args:
532607
directory_path: Local directory path
533608
bucket_name: TOS bucket name
609+
metadata: Metadata to associate with the objects
534610
"""
535611
bucket_name = self._check_bucket_name(bucket_name)
536612

@@ -544,7 +620,7 @@ def _upload_dir(root_dir):
544620
# Use relative path of file as object key
545621
object_key = os.path.relpath(path, directory_path)
546622
# upload_file method doesn't return value, use exceptions to determine success
547-
self.upload_file(path, bucket_name, object_key)
623+
self.upload_file(path, bucket_name, object_key, metadata=metadata)
548624

549625
try:
550626
_upload_dir(directory_path)
@@ -555,13 +631,14 @@ def _upload_dir(root_dir):
555631
raise
556632

557633
async def async_upload_directory(
558-
self, directory_path: str, bucket_name: str = ""
634+
self, directory_path: str, bucket_name: str = "", metadata: dict | None = None
559635
) -> None:
560636
"""Asynchronously upload entire directory to TOS bucket
561637
562638
Args:
563639
directory_path: Local directory path
564640
bucket_name: TOS bucket name
641+
metadata: Metadata to associate with the objects
565642
"""
566643
bucket_name = self._check_bucket_name(bucket_name)
567644

@@ -575,7 +652,9 @@ async def _aupload_dir(root_dir):
575652
# Use relative path of file as object key
576653
object_key = os.path.relpath(path, directory_path)
577654
# Asynchronously upload single file
578-
await self.async_upload_file(path, bucket_name, object_key)
655+
await self.async_upload_file(
656+
path, bucket_name, object_key, metadata=metadata
657+
)
579658

580659
try:
581660
await _aupload_dir(directory_path)

veadk/knowledgebase/backends/base_backend.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ def precheck_index_naming(self) -> None:
2929
"""
3030

3131
@abstractmethod
32-
def add_from_directory(self, directory: str, **kwargs) -> bool:
32+
def add_from_directory(self, directory: str, *args, **kwargs) -> bool:
3333
"""Add knowledge from file path to knowledgebase"""
3434

3535
@abstractmethod
36-
def add_from_files(self, files: list[str], **kwargs) -> bool:
36+
def add_from_files(self, files: list[str], *args, **kwargs) -> bool:
3737
"""Add knowledge (e.g, documents, strings, ...) to knowledgebase"""
3838

3939
@abstractmethod
40-
def add_from_text(self, text: str | list[str], **kwargs) -> bool:
40+
def add_from_text(self, text: str | list[str], *args, **kwargs) -> bool:
4141
"""Add knowledge from text to knowledgebase"""
4242

4343
@abstractmethod
44-
def search(self, **kwargs) -> list:
44+
def search(self, *args, **kwargs) -> list:
4545
"""Search knowledge from knowledgebase"""
4646

4747
# Optional methods for future use:

0 commit comments

Comments
 (0)