Skip to content

Commit a25ed72

Browse files
committed
partial prefixes
1 parent 6e5000c commit a25ed72

File tree

2 files changed

+62
-3
lines changed

2 files changed

+62
-3
lines changed

packages/aws-library/src/aws_library/s3/_client.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ async def list_objects(
171171
prefix: S3ObjectPrefix | None,
172172
start_after: S3ObjectKey | None,
173173
limit: int = _MAX_ITEMS_PER_PAGE,
174+
is_partial_prefix: bool = False,
174175
) -> list[S3MetaData | S3DirectoryMetaData]:
175176
"""returns a number of entries in the bucket, defined by limit
176177
the entries are sorted alphabetically by key
@@ -189,11 +190,16 @@ async def list_objects(
189190
if limit > _AWS_MAX_ITEMS_PER_PAGE:
190191
msg = f"num_objects must be <= {_AWS_MAX_ITEMS_PER_PAGE}"
191192
raise ValueError(msg)
193+
194+
final_prefix = f"{prefix}" if prefix else ""
195+
if prefix and not is_partial_prefix:
196+
final_prefix = (
197+
f"{final_prefix.rstrip(_S3_OBJECT_DELIMITER)}{_S3_OBJECT_DELIMITER}"
198+
)
199+
192200
listed_objects = await self._client.list_objects_v2(
193201
Bucket=bucket,
194-
Prefix=f"{str(prefix).rstrip(_S3_OBJECT_DELIMITER)}{_S3_OBJECT_DELIMITER}"
195-
if prefix
196-
else "",
202+
Prefix=final_prefix,
197203
MaxKeys=limit,
198204
StartAfter=start_after or "",
199205
Delimiter=_S3_OBJECT_DELIMITER,

packages/aws-library/tests/test_s3_client.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import filecmp
1111
import json
1212
import logging
13+
import random
1314
from collections import defaultdict
1415
from collections.abc import AsyncIterator, Awaitable, Callable
1516
from dataclasses import dataclass
@@ -669,6 +670,58 @@ async def test_list_objects_pagination(
669670
assert len(objects) == (total_num_files - (num_fetch - 1) * limit)
670671

671672

673+
@pytest.mark.parametrize(
674+
"directory_size, min_file_size, max_file_size, depth",
675+
[
676+
(
677+
TypeAdapter(ByteSize).validate_python("1Mib"),
678+
TypeAdapter(ByteSize).validate_python("1B"),
679+
TypeAdapter(ByteSize).validate_python("10Kib"),
680+
0,
681+
)
682+
],
683+
ids=byte_size_ids,
684+
)
685+
async def test_list_objects_partial_prefix(
686+
mocked_s3_server_envs: EnvVarsDict,
687+
with_s3_bucket: S3BucketName,
688+
with_uploaded_folder_on_s3: list[UploadedFile],
689+
simcore_s3_api: SimcoreS3API,
690+
):
691+
total_num_files = len(with_uploaded_folder_on_s3)
692+
# pre-condition
693+
directories, files = _get_paths_with_prefix(
694+
with_uploaded_folder_on_s3, prefix_level=0, path_prefix=None
695+
)
696+
assert len(directories) == 1, "test pre-condition not fulfilled!"
697+
assert not files
698+
699+
first_level_prefix = next(iter(directories))
700+
first_level_directories, first_level_files = _get_paths_with_prefix(
701+
with_uploaded_folder_on_s3, prefix_level=1, path_prefix=first_level_prefix
702+
)
703+
assert (
704+
not first_level_directories
705+
), "test pre-condition not fulfilled, there should be only files for this test"
706+
assert len(first_level_files) == total_num_files
707+
708+
a_random_file = random.choice(list(first_level_files)) # noqa: S311
709+
a_partial_prefix = a_random_file.name[0:1]
710+
expected_files = {
711+
file for file in first_level_files if file.name.startswith(a_partial_prefix)
712+
}
713+
714+
# now we will fetch the file objects according to the given limit
715+
objects = await simcore_s3_api.list_objects(
716+
bucket=with_s3_bucket,
717+
prefix=first_level_prefix / a_partial_prefix,
718+
start_after=None,
719+
is_partial_prefix=True,
720+
)
721+
assert len(objects) == len(expected_files)
722+
assert {_.as_path() for _ in objects} == expected_files
723+
724+
672725
async def test_get_file_metadata(
673726
mocked_s3_server_envs: EnvVarsDict,
674727
with_s3_bucket: S3BucketName,

0 commit comments

Comments
 (0)