Skip to content

Commit 9690542

Browse files
committed
feat(stream_io): Support Boto3's default credential resolution
1 parent 7a762df commit 9690542

File tree

2 files changed

+63
-45
lines changed

2 files changed

+63
-45
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
[Unreleased]
99

10+
### Added
11+
12+
- `stream_io.open_stream()` now respects Boto3's configuration files
13+
and environment variables when searching for object storage credentials to use
14+
1015
### Fixed
1116

1217
- `stream_io.open_stream()` now uses virtual-hosted-style

tensorizer/stream_io.py

Lines changed: 58 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import boto3
2121
import botocore
22+
import botocore.exceptions
2223
import redis
2324

2425
import tensorizer._version as _version
@@ -903,19 +904,12 @@ def _is_caios(endpoint: str) -> bool:
903904

904905

905906
def _new_s3_client(
906-
s3_access_key_id: str,
907-
s3_secret_access_key: str,
908-
s3_endpoint: str,
907+
s3_access_key_id: Optional[str],
908+
s3_secret_access_key: Optional[str],
909+
s3_endpoint: Optional[str],
909910
s3_region_name: Optional[str] = None,
910911
s3_signature_version: Optional[str] = None,
911912
):
912-
if s3_secret_access_key is None:
913-
raise TypeError("No secret key provided")
914-
if s3_access_key_id is None:
915-
raise TypeError("No access key provided")
916-
if s3_endpoint is None:
917-
raise TypeError("No S3 endpoint provided")
918-
919913
config_args = dict(user_agent=_BOTO_USER_AGENT)
920914
auth_args = {}
921915

@@ -960,9 +954,9 @@ def _parse_s3_uri(uri: str) -> Tuple[str, str]:
960954
def s3_upload(
961955
path: str,
962956
target_uri: str,
963-
s3_access_key_id: str,
964-
s3_secret_access_key: str,
965-
s3_endpoint: str = default_s3_write_endpoint,
957+
s3_access_key_id: Optional[str],
958+
s3_secret_access_key: Optional[str],
959+
s3_endpoint: Optional[str] = default_s3_write_endpoint,
966960
s3_region_name: Optional[str] = None,
967961
s3_signature_version: Optional[str] = None,
968962
):
@@ -979,9 +973,9 @@ def s3_upload(
979973

980974
def _s3_download_url(
981975
path_uri: str,
982-
s3_access_key_id: str,
983-
s3_secret_access_key: str,
984-
s3_endpoint: str = default_s3_read_endpoint,
976+
s3_access_key_id: Optional[str],
977+
s3_secret_access_key: Optional[str],
978+
s3_endpoint: Optional[str] = default_s3_read_endpoint,
985979
s3_region_name: Optional[str] = None,
986980
s3_signature_version: Optional[str] = None,
987981
) -> str:
@@ -1029,19 +1023,40 @@ def _s3_download_url(
10291023
expiry = t - (t % SIG_GRANULARITY) + (SIG_GRANULARITY * 2)
10301024
seconds_to_expiry = expiry - t
10311025

1032-
url = client.generate_presigned_url(
1033-
ClientMethod="get_object",
1034-
Params={"Bucket": bucket, "Key": key},
1035-
ExpiresIn=seconds_to_expiry,
1036-
)
1026+
try:
1027+
# This is the first point at which an error may be raised by boto3
1028+
# for missing credentials
1029+
url = client.generate_presigned_url(
1030+
ClientMethod="get_object",
1031+
Params={"Bucket": bucket, "Key": key},
1032+
ExpiresIn=seconds_to_expiry,
1033+
)
1034+
except botocore.exceptions.NoCredentialsError:
1035+
if s3_access_key_id is None and s3_secret_access_key is None:
1036+
# Credentials may be absent because a public read
1037+
# bucket is being used, so try blank credentials
1038+
try:
1039+
return _s3_download_url(
1040+
path_uri,
1041+
"",
1042+
"",
1043+
s3_endpoint,
1044+
s3_region_name,
1045+
s3_signature_version,
1046+
)
1047+
except botocore.exceptions.NoCredentialsError:
1048+
# If this has the same error for some reason,
1049+
# just ignore it, and raise the original error
1050+
pass
1051+
raise
10371052
return url
10381053

10391054

10401055
def s3_download(
10411056
path_uri: str,
1042-
s3_access_key_id: str,
1043-
s3_secret_access_key: str,
1044-
s3_endpoint: str = default_s3_read_endpoint,
1057+
s3_access_key_id: Optional[str],
1058+
s3_secret_access_key: Optional[str],
1059+
s3_endpoint: Optional[str] = default_s3_read_endpoint,
10451060
s3_region_name: Optional[str] = None,
10461061
s3_signature_version: Optional[str] = None,
10471062
buffer_size: Optional[int] = None,
@@ -1372,27 +1387,25 @@ def open_stream(
13721387
# Not required to have been found,
13731388
# and doesn't overwrite an explicitly specified endpoint.
13741389
s3_endpoint = s3_endpoint or s3.s3_endpoint
1375-
except (ValueError, FileNotFoundError) as e:
1376-
# Uploads always require credentials here, but downloads may not
1377-
if is_s3_upload:
1378-
raise
1379-
else:
1380-
# Credentials may be absent because a public read
1381-
# bucket is being used, so try blank credentials,
1382-
# but provide a descriptive warning for future errors
1383-
# that may occur due to this exception being suppressed.
1384-
# Don't save the whole exception object since it holds
1385-
# a stack trace, which can interfere with garbage collection.
1386-
error_context = (
1387-
"Warning: empty credentials were used for S3."
1388-
f"\nReason: {e}"
1389-
"\nIf the connection failed due to missing permissions"
1390-
" (e.g. HTTP error 403), try providing credentials"
1391-
" directly with the tensorizer.stream_io.open_stream()"
1392-
" function."
1393-
)
1394-
s3_access_key_id = s3_access_key_id or ""
1395-
s3_secret_access_key = s3_access_key_id or ""
1390+
except (ValueError, FileNotFoundError):
1391+
# TODO: Reimplement this logic somewhere in s3_download
1392+
#
1393+
# Credentials may be absent because a public read
1394+
# bucket is being used, so try blank credentials,
1395+
# but provide a descriptive warning for future errors
1396+
# that may occur due to this exception being suppressed.
1397+
# Don't save the whole exception object since it holds
1398+
# a stack trace, which can interfere with garbage collection.
1399+
#
1400+
# error_context = (
1401+
# "Warning: empty credentials were used for S3."
1402+
# f"\nReason: {e}"
1403+
# "\nIf the connection failed due to missing permissions"
1404+
# " (e.g. HTTP error 403), try providing credentials"
1405+
# " directly with the tensorizer.stream_io.open_stream()"
1406+
# " function."
1407+
# )
1408+
pass
13961409

13971410
# Regardless of whether the config needed to be parsed,
13981411
# the endpoint gets a default value based on the operation.

0 commit comments

Comments
 (0)