Skip to content

Commit 33fdf7e

Browse files
committed
test pagination
1 parent 87687e1 commit 33fdf7e

File tree

2 files changed

+89
-27
lines changed

2 files changed

+89
-27
lines changed

packages/aws-library/src/aws_library/s3/_client.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,31 @@ async def list_objects(
170170
bucket: S3BucketName,
171171
prefix: S3ObjectPrefix | None,
172172
start_after: S3ObjectKey | None,
173-
num_objects: int = _MAX_ITEMS_PER_PAGE,
173+
limit: int = _MAX_ITEMS_PER_PAGE,
174174
) -> list[S3MetaData | S3DirectoryMetaData]:
175-
if num_objects < 1:
176-
return []
177-
if num_objects > _AWS_MAX_ITEMS_PER_PAGE:
175+
"""returns a number of entries in the bucket, defined by limit
176+
the entries are sorted alphabetically by key
177+
the first entry is defined by start_after
178+
if start_after is None, the first entry is the first one in the bucket
179+
if prefix is not None, only entries with the given prefix are returned
180+
if prefix is None, all entries in the bucket are returned
181+
limit must be >= 1 and <= _AWS_MAX_ITEMS_PER_PAGE
182+
183+
Raises:
184+
ValueError: in case of invalid limit
185+
"""
186+
if limit < 1:
187+
msg = "num_objects must be >= 1"
188+
raise ValueError(msg)
189+
if limit > _AWS_MAX_ITEMS_PER_PAGE:
178190
msg = f"num_objects must be <= {_AWS_MAX_ITEMS_PER_PAGE}"
179191
raise ValueError(msg)
180192
listed_objects = await self._client.list_objects_v2(
181193
Bucket=bucket,
182194
Prefix=f"{str(prefix).rstrip(_S3_OBJECT_DELIMITER)}{_S3_OBJECT_DELIMITER}"
183195
if prefix
184196
else "",
185-
MaxKeys=num_objects,
197+
MaxKeys=limit,
186198
StartAfter=start_after or "",
187199
Delimiter=_S3_OBJECT_DELIMITER,
188200
)

packages/aws-library/tests/test_s3_client.py

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from models_library.api_schemas_storage import S3BucketName, UploadedPart
3333
from models_library.basic_types import SHA256Str
3434
from moto.server import ThreadedMotoServer
35-
from pydantic import AnyUrl, ByteSize, TypeAdapter
35+
from pydantic import AnyUrl, ByteSize, NonNegativeInt, TypeAdapter
3636
from pytest_benchmark.plugin import BenchmarkFixture
3737
from pytest_simcore.helpers.logging_tools import log_context
3838
from pytest_simcore.helpers.parametrizations import (
@@ -346,17 +346,21 @@ def set_log_levels_for_noisy_libraries() -> None:
346346
@pytest.fixture
347347
async def create_folder_on_s3(
348348
create_folder_of_size_with_multiple_files: Callable[
349-
[ByteSize, ByteSize, ByteSize], Path
349+
[ByteSize, ByteSize, ByteSize, NonNegativeInt | None], Path
350350
],
351351
upload_file: Callable[[Path, Path], Awaitable[UploadedFile]],
352352
directory_size: ByteSize,
353353
min_file_size: ByteSize,
354354
max_file_size: ByteSize,
355+
depth: NonNegativeInt | None,
355356
) -> Callable[[], Awaitable[list[UploadedFile]]]:
356357
async def _() -> list[UploadedFile]:
357358
# create random files of random size and upload to S3
358359
folder = create_folder_of_size_with_multiple_files(
359-
ByteSize(directory_size), ByteSize(min_file_size), ByteSize(max_file_size)
360+
ByteSize(directory_size),
361+
ByteSize(min_file_size),
362+
ByteSize(max_file_size),
363+
depth,
360364
)
361365
list_uploaded_files = []
362366

@@ -529,12 +533,13 @@ def _filter_by_prefix(uploaded_file: UploadedFile) -> bool:
529533

530534

531535
@pytest.mark.parametrize(
532-
"directory_size, min_file_size, max_file_size",
536+
"directory_size, min_file_size, max_file_size, depth",
533537
[
534538
(
535539
TypeAdapter(ByteSize).validate_python("1Mib"),
536540
TypeAdapter(ByteSize).validate_python("1B"),
537541
TypeAdapter(ByteSize).validate_python("10Kib"),
542+
None,
538543
)
539544
],
540545
ids=byte_size_ids,
@@ -603,27 +608,65 @@ async def test_list_objects_pagination_num_objects_limits(
603608
)
604609

605610

611+
@pytest.mark.parametrize(
612+
"directory_size, min_file_size, max_file_size, depth",
613+
[
614+
(
615+
TypeAdapter(ByteSize).validate_python("1Mib"),
616+
TypeAdapter(ByteSize).validate_python("1B"),
617+
TypeAdapter(ByteSize).validate_python("10Kib"),
618+
0,
619+
)
620+
],
621+
ids=byte_size_ids,
622+
)
623+
@pytest.mark.parametrize("limit", [10, 50, 300], ids=lambda x: f"limit={x}")
606624
async def test_list_objects_pagination(
607-
faker: Faker,
608625
mocked_s3_server_envs: EnvVarsDict,
609626
with_s3_bucket: S3BucketName,
627+
with_uploaded_folder_on_s3: list[UploadedFile],
610628
simcore_s3_api: SimcoreS3API,
629+
limit: int,
611630
):
612-
objects = await simcore_s3_api.list_objects(
613-
bucket=with_s3_bucket,
614-
prefix=None,
615-
start_after=None,
616-
num_objects=faker.pyint(max_value=0),
631+
total_num_files = len(with_uploaded_folder_on_s3)
632+
# pre-condition
633+
directories, files = _get_paths_with_prefix(
634+
with_uploaded_folder_on_s3, prefix_level=0, path_prefix=None
617635
)
618-
assert objects == []
636+
assert len(directories) == 1, "test pre-condition not fulfilled!"
637+
assert not files
619638

620-
with pytest.raises(ValueError, match=r"num_objects must be <= \d+"):
621-
await simcore_s3_api.list_objects(
639+
first_level_prefix = next(iter(directories))
640+
first_level_directories, first_level_files = _get_paths_with_prefix(
641+
with_uploaded_folder_on_s3, prefix_level=1, path_prefix=first_level_prefix
642+
)
643+
assert (
644+
not first_level_directories
645+
), "test pre-condition not fulfilled, there should be only files for this test"
646+
assert len(first_level_files) == total_num_files
647+
648+
# now we will fetch the file objects according to the given limit
649+
num_fetch = int(round(total_num_files / limit + 0.5))
650+
assert num_fetch >= 1
651+
start_after_key = None
652+
for i in range(num_fetch - 1):
653+
objects = await simcore_s3_api.list_objects(
622654
bucket=with_s3_bucket,
623-
prefix=None,
624-
start_after=None,
625-
num_objects=_AWS_MAX_ITEMS_PER_PAGE + 1,
655+
prefix=first_level_prefix,
656+
start_after=start_after_key,
657+
limit=limit,
626658
)
659+
assert len(objects) == limit, f"fetch {i} returned a wrong number of objects"
660+
assert isinstance(objects[-1], S3MetaData)
661+
start_after_key = objects[-1].object_key
662+
# last fetch
663+
objects = await simcore_s3_api.list_objects(
664+
bucket=with_s3_bucket,
665+
prefix=first_level_prefix,
666+
start_after=start_after_key,
667+
limit=limit,
668+
)
669+
assert len(objects) == (total_num_files - (num_fetch - 1) * limit)
627670

628671

629672
async def test_get_file_metadata(
@@ -1233,12 +1276,13 @@ async def test_copy_file_invalid_raises(
12331276

12341277

12351278
@pytest.mark.parametrize(
1236-
"directory_size, min_file_size, max_file_size",
1279+
"directory_size, min_file_size, max_file_size, depth",
12371280
[
12381281
(
12391282
TypeAdapter(ByteSize).validate_python("1Mib"),
12401283
TypeAdapter(ByteSize).validate_python("1B"),
12411284
TypeAdapter(ByteSize).validate_python("10Kib"),
1285+
None,
12421286
)
12431287
],
12441288
ids=byte_size_ids,
@@ -1259,12 +1303,13 @@ async def test_get_directory_metadata(
12591303

12601304

12611305
@pytest.mark.parametrize(
1262-
"directory_size, min_file_size, max_file_size",
1306+
"directory_size, min_file_size, max_file_size, depth",
12631307
[
12641308
(
12651309
TypeAdapter(ByteSize).validate_python("1Mib"),
12661310
TypeAdapter(ByteSize).validate_python("1B"),
12671311
TypeAdapter(ByteSize).validate_python("10Kib"),
1312+
None,
12681313
)
12691314
],
12701315
ids=byte_size_ids,
@@ -1291,12 +1336,13 @@ async def test_get_directory_metadata_raises(
12911336

12921337

12931338
@pytest.mark.parametrize(
1294-
"directory_size, min_file_size, max_file_size",
1339+
"directory_size, min_file_size, max_file_size, depth",
12951340
[
12961341
(
12971342
TypeAdapter(ByteSize).validate_python("1Mib"),
12981343
TypeAdapter(ByteSize).validate_python("1B"),
12991344
TypeAdapter(ByteSize).validate_python("10Kib"),
1345+
None,
13001346
)
13011347
],
13021348
ids=byte_size_ids,
@@ -1327,12 +1373,13 @@ async def test_delete_file_recursively(
13271373

13281374

13291375
@pytest.mark.parametrize(
1330-
"directory_size, min_file_size, max_file_size",
1376+
"directory_size, min_file_size, max_file_size, depth",
13311377
[
13321378
(
13331379
TypeAdapter(ByteSize).validate_python("1Mib"),
13341380
TypeAdapter(ByteSize).validate_python("1B"),
13351381
TypeAdapter(ByteSize).validate_python("10Kib"),
1382+
None,
13361383
)
13371384
],
13381385
ids=byte_size_ids,
@@ -1365,12 +1412,13 @@ async def test_delete_file_recursively_raises(
13651412

13661413

13671414
@pytest.mark.parametrize(
1368-
"directory_size, min_file_size, max_file_size",
1415+
"directory_size, min_file_size, max_file_size, depth",
13691416
[
13701417
(
13711418
TypeAdapter(ByteSize).validate_python("1Mib"),
13721419
TypeAdapter(ByteSize).validate_python("1B"),
13731420
TypeAdapter(ByteSize).validate_python("10Kib"),
1421+
None,
13741422
)
13751423
],
13761424
ids=byte_size_ids,
@@ -1468,17 +1516,19 @@ def run_async_test(*args, **kwargs) -> None:
14681516

14691517

14701518
@pytest.mark.parametrize(
1471-
"directory_size, min_file_size, max_file_size",
1519+
"directory_size, min_file_size, max_file_size, depth",
14721520
[
14731521
(
14741522
TypeAdapter(ByteSize).validate_python("1Mib"),
14751523
TypeAdapter(ByteSize).validate_python("1B"),
14761524
TypeAdapter(ByteSize).validate_python("10Kib"),
1525+
None,
14771526
),
14781527
(
14791528
TypeAdapter(ByteSize).validate_python("500Mib"),
14801529
TypeAdapter(ByteSize).validate_python("10Mib"),
14811530
TypeAdapter(ByteSize).validate_python("50Mib"),
1531+
None,
14821532
),
14831533
],
14841534
ids=byte_size_ids,

0 commit comments

Comments
 (0)