Skip to content

Commit 7a08545

Browse files
muddyfishdnanuti
andauthored
Add additional logging for construction and HeadObject requests (#128)
* Add additional logging to verify which constructor is used for Dataset creation * Add logging whenever we run an explicit HeadObject request * Add additional logging for explicit calls to S3Client * Add extra tests which validate logs are performed when S3Client methods are called * Add explicit logging for s3_uri in from_prefix constructors --------- Co-authored-by: Simon Beal <[email protected]> Co-authored-by: Diana Nanuti <[email protected]>
1 parent f8b4082 commit 7a08545

File tree

7 files changed

+109
-13
lines changed

7 files changed

+109
-13
lines changed

s3torchconnector/src/s3torchconnector/_s3client/_s3client.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# // SPDX-License-Identifier: BSD
33

4+
import logging
45
import os
56
from functools import partial
67
from typing import Optional, Any
@@ -23,6 +24,9 @@
2324
"""
2425

2526

27+
log = logging.getLogger(__name__)
28+
29+
2630
def _identity(obj: Any) -> Any:
2731
return obj
2832

@@ -51,6 +55,7 @@ def _client_builder(self) -> MountpointS3Client:
5155
)
5256

5357
def get_object(self, bucket: str, key: str) -> S3Reader:
58+
log.debug(f"GetObject s3://{bucket}/{key}")
5459
return S3Reader(
5560
bucket,
5661
key,
@@ -64,21 +69,25 @@ def _get_object_stream(self, bucket: str, key: str) -> GetObjectStream:
6469
def put_object(
6570
self, bucket: str, key: str, storage_class: Optional[str] = None
6671
) -> S3Writer:
72+
log.debug(f"PutObject s3://{bucket}/{key}")
6773
return S3Writer(self._client.put_object(bucket, key, storage_class))
6874

6975
# TODO: Probably need a ListObjectResult on dataset side
7076
def list_objects(
7177
self, bucket: str, prefix: str = "", delimiter: str = "", max_keys: int = 1000
7278
) -> ListObjectStream:
79+
log.debug(f"ListObjects s3://{bucket}/{prefix}")
7380
return self._client.list_objects(bucket, prefix, delimiter, max_keys)
7481

7582
# TODO: We need ObjectInfo on dataset side
7683
def head_object(self, bucket: str, key: str) -> ObjectInfo:
84+
log.debug(f"HeadObject s3://{bucket}/{key}")
7785
return self._client.head_object(bucket, key)
7886

7987
def from_bucket_and_object_info(
8088
self, bucket: str, object_info: ObjectInfo
8189
) -> S3Reader:
90+
log.debug(f"GetObjectWithInfo s3://{bucket}/{object_info.key}")
8291
return S3Reader(
8392
bucket,
8493
object_info.key,

s3torchconnector/src/s3torchconnector/s3iterable_dataset.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# // SPDX-License-Identifier: BSD
33
from functools import partial
44
from typing import Iterator, Any, Union, Iterable, Callable
5+
import logging
56

67
import torch.utils.data
78

@@ -14,6 +15,8 @@
1415
get_objects_from_prefix,
1516
)
1617

18+
log = logging.getLogger(__name__)
19+
1720

1821
class S3IterableDataset(torch.utils.data.IterableDataset):
1922
"""An IterableStyle dataset created from S3 objects.
@@ -58,6 +61,7 @@ def from_objects(
5861
Raises:
5962
S3Exception: An error occurred accessing S3.
6063
"""
64+
log.info(f"Building {cls.__name__} from_objects")
6165
return cls(
6266
region, partial(get_objects_from_uris, object_uris), transform=transform
6367
)
@@ -83,6 +87,7 @@ def from_prefix(
8387
Raises:
8488
S3Exception: An error occurred accessing S3.
8589
"""
90+
log.info(f"Building {cls.__name__} from_prefix {s3_uri=}")
8691
return cls(
8792
region, partial(get_objects_from_prefix, s3_uri), transform=transform
8893
)

s3torchconnector/src/s3torchconnector/s3map_dataset.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# // SPDX-License-Identifier: BSD
33
from functools import partial
44
from typing import List, Any, Callable, Iterable, Union
5+
import logging
56

67
import torch.utils.data
78
from s3torchconnector._s3bucket_key import S3BucketKey
@@ -15,6 +16,8 @@
1516
identity,
1617
)
1718

19+
log = logging.getLogger(__name__)
20+
1821

1922
class S3MapDataset(torch.utils.data.Dataset):
2023
"""A Map-Style dataset created from S3 objects.
@@ -66,7 +69,7 @@ def from_objects(
6669
Raises:
6770
S3Exception: An error occurred accessing S3.
6871
"""
69-
72+
log.info(f"Building {cls.__name__} from_objects")
7073
return cls(
7174
region, partial(get_objects_from_uris, object_uris), transform=transform
7275
)
@@ -92,6 +95,7 @@ def from_prefix(
9295
Raises:
9396
S3Exception: An error occurred accessing S3.
9497
"""
98+
log.info(f"Building {cls.__name__} from_prefix {s3_uri=}")
9599
return cls(
96100
region, partial(get_objects_from_prefix, s3_uri), transform=transform
97101
)

s3torchconnector/tst/unit/test_checkpointing.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# // SPDX-License-Identifier: BSD
3-
4-
from io import BytesIO
3+
import logging
4+
from io import BytesIO, SEEK_END
55
from operator import eq
66
from typing import Any, Callable
77
from unittest.mock import patch
@@ -147,6 +147,23 @@ def test_general_checkpointing_untyped_storage_loads_no_modern_pytorch_format(
147147
)
148148

149149

150+
def test_checkpoint_seek_logging(caplog):
151+
checkpoint = S3Checkpoint(TEST_REGION)
152+
153+
# Use MockClient instead of actual client.
154+
client = MockS3Client(TEST_REGION, TEST_BUCKET)
155+
checkpoint._client = client
156+
157+
s3_uri = f"s3://{TEST_BUCKET}/{TEST_KEY}"
158+
with checkpoint.writer(s3_uri) as writer:
159+
writer.write(b"test")
160+
161+
with checkpoint.reader(s3_uri) as reader:
162+
with caplog.at_level(logging.DEBUG):
163+
reader.seek(0, SEEK_END)
164+
assert f"HeadObject {s3_uri}" in caplog.messages
165+
166+
150167
def _test_save(
151168
data,
152169
byteorder: str,
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# // SPDX-License-Identifier: BSD
3+
import logging
4+
5+
import pytest
6+
7+
from s3torchconnector._s3client import S3Client, MockS3Client
8+
from s3torchconnectorclient._mountpoint_s3_client import ObjectInfo
9+
10+
TEST_BUCKET = "test-bucket"
11+
TEST_KEY = "test-key"
12+
TEST_REGION = "us-east-1"
13+
S3_URI = f"s3://{TEST_BUCKET}/{TEST_KEY}"
14+
15+
16+
@pytest.fixture
17+
def s3_client() -> S3Client:
18+
client = MockS3Client(TEST_REGION, TEST_BUCKET)
19+
client.add_object(TEST_KEY, b"data")
20+
return client
21+
22+
23+
def test_get_object_log(s3_client: S3Client, caplog):
24+
with caplog.at_level(logging.DEBUG):
25+
s3_client.get_object(TEST_BUCKET, TEST_KEY)
26+
assert f"GetObject {S3_URI}" in caplog.messages
27+
28+
29+
def test_get_object_info_log(s3_client: S3Client, caplog):
30+
with caplog.at_level(logging.DEBUG):
31+
s3_client.from_bucket_and_object_info(
32+
TEST_BUCKET,
33+
ObjectInfo(TEST_KEY, "", 0, 0, None, None),
34+
)
35+
assert f"GetObjectWithInfo {S3_URI}" in caplog.messages
36+
37+
38+
def test_head_object_log(s3_client: S3Client, caplog):
39+
with caplog.at_level(logging.DEBUG):
40+
s3_client.head_object(TEST_BUCKET, TEST_KEY)
41+
assert f"HeadObject {S3_URI}" in caplog.messages
42+
43+
44+
def test_put_object_log(s3_client: S3Client, caplog):
45+
with caplog.at_level(logging.DEBUG):
46+
s3_client.put_object(TEST_BUCKET, TEST_KEY)
47+
assert f"PutObject {S3_URI}" in caplog.messages
48+
49+
50+
def test_list_objects_log(s3_client: S3Client, caplog):
51+
with caplog.at_level(logging.DEBUG):
52+
s3_client.list_objects(TEST_BUCKET, TEST_KEY)
53+
assert f"ListObjects {S3_URI}" in caplog.messages

s3torchconnector/tst/unit/test_s3iterable_dataset.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# // SPDX-License-Identifier: BSD
3-
3+
import logging
44
from typing import Iterable, Callable, Sequence, Any
55

66
import pytest
@@ -15,16 +15,20 @@
1515
)
1616

1717

18-
def test_dataset_creation_from_prefix_with_region():
19-
dataset = S3IterableDataset.from_prefix(S3_PREFIX, region=TEST_REGION)
18+
def test_dataset_creation_from_prefix_with_region(caplog):
19+
with caplog.at_level(logging.INFO):
20+
dataset = S3IterableDataset.from_prefix(S3_PREFIX, region=TEST_REGION)
2021
assert isinstance(dataset, S3IterableDataset)
2122
assert dataset.region == TEST_REGION
23+
assert "Building S3IterableDataset from_prefix" in caplog.text
2224

2325

24-
def test_dataset_creation_from_objects_with_region():
25-
dataset = S3IterableDataset.from_objects([], region=TEST_REGION)
26+
def test_dataset_creation_from_objects_with_region(caplog):
27+
with caplog.at_level(logging.INFO):
28+
dataset = S3IterableDataset.from_objects([], region=TEST_REGION)
2629
assert isinstance(dataset, S3IterableDataset)
2730
assert dataset.region == TEST_REGION
31+
assert "Building S3IterableDataset from_objects" in caplog.text
2832

2933

3034
@pytest.mark.parametrize(

s3torchconnector/tst/unit/test_s3mapdataset.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
# // SPDX-License-Identifier: BSD
3-
3+
import logging
44
from typing import Sequence, Callable, Any
55

66
import pytest
@@ -15,16 +15,20 @@
1515
)
1616

1717

18-
def test_dataset_creation_from_prefix_with_region():
19-
dataset = S3MapDataset.from_prefix(S3_PREFIX, region=TEST_REGION)
18+
def test_dataset_creation_from_prefix_with_region(caplog):
19+
with caplog.at_level(logging.INFO):
20+
dataset = S3MapDataset.from_prefix(S3_PREFIX, region=TEST_REGION)
2021
assert isinstance(dataset, S3MapDataset)
2122
assert dataset.region == TEST_REGION
23+
assert "Building S3MapDataset from_prefix" in caplog.text
2224

2325

24-
def test_dataset_creation_from_objects_with_region():
25-
dataset = S3MapDataset.from_objects([], region=TEST_REGION)
26+
def test_dataset_creation_from_objects_with_region(caplog):
27+
with caplog.at_level(logging.INFO):
28+
dataset = S3MapDataset.from_objects([], region=TEST_REGION)
2629
assert isinstance(dataset, S3MapDataset)
2730
assert dataset.region == TEST_REGION
31+
assert "Building S3MapDataset from_objects" in caplog.text
2832

2933

3034
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)