Skip to content

Commit 2dc6118

Browse files
author
Bo Wang
committed
Fix s3 accesspoint url parsing
1 parent f2c7717 commit 2dc6118

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

fsspec/tests/test_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,14 @@ def test_infer_options():
209209
# - The bucket is included in path
210210
for protocol in ["s3", "s3a", "gcs", "gs"]:
211211
options = infer_storage_options(f"{protocol}://Bucket-name.com/test.csv")
212+
assert options["host"] == "Bucket-name.com"
212213
assert options["path"] == "Bucket-name.com/test.csv"
213214

215+
for protocol in ["s3", "s3a"]:
216+
options = infer_storage_options(f"{protocol}://arn:aws:s3:us-west-2:1234:accesspoint/abc/test.csv")
217+
assert options["host"] == "arn:aws:s3:us-west-2:1234:accesspoint"
218+
assert options["path"] == "arn:aws:s3:us-west-2:1234:accesspoint/abc/test.csv"
219+
214220
with pytest.raises(KeyError):
215221
infer_storage_options("file:///bucket/file.csv", {"path": "collide"})
216222
with pytest.raises(KeyError):

fsspec/utils.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,17 @@ def infer_storage_options(
9696
# https://github.com/dask/dask/issues/1417
9797
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1].rsplit(":", 1)[0]
9898

99+
if protocol in ("s3", "s3a") and parsed_path.netloc.endswith(":accesspoint"):
100+
# When receiving a s3 accesspoint url like s3://arn:aws:s3:us-west-2:1234:accesspoint/abc
101+
# the :accesspoint suffix would fail the port parsing with a ValueError complaining the port is not an integer
102+
# Ignore the port setting and keep the :accesspoint suffix in the options["host"]
103+
options["host"] = parsed_path.netloc.rsplit("@", 1)[-1]
104+
else:
105+
if parsed_path.port:
106+
options["port"] = parsed_path.port
107+
99108
if protocol in ("s3", "s3a", "gcs", "gs"):
100109
options["path"] = options["host"] + options["path"]
101-
else:
102-
options["host"] = options["host"]
103-
if parsed_path.port:
104-
options["port"] = parsed_path.port
105110
if parsed_path.username:
106111
options["username"] = parsed_path.username
107112
if parsed_path.password:

0 commit comments

Comments
 (0)