Skip to content

Commit 5a48eb4

Browse files
authored
s3fs: handle versioned paths in info and exists (#746)
1 parent 22f8ea1 commit 5a48eb4

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

s3fs/core.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -967,18 +967,39 @@ async def _ls(self, path, detail=False, refresh=False, versions=False):
967967
return files
968968
return files if detail else sorted([o["name"] for o in files])
969969

970+
def _exists_in_cache(self, path, bucket, key, version_id):
971+
fullpath = "/".join((bucket, key))
972+
973+
try:
974+
entries = self._ls_from_cache(fullpath)
975+
except FileNotFoundError:
976+
return False
977+
978+
if entries is None:
979+
return None
980+
981+
if not self.version_aware or version_id is None:
982+
return True
983+
984+
for entry in entries:
985+
if entry["name"] == fullpath and entry.get("VersionId") == version_id:
986+
return True
987+
988+
# dircache doesn't support multiple versions, so we really can't tell if
989+
# the one we want exists.
990+
return None
991+
970992
async def _exists(self, path):
971993
if path in ["", "/"]:
972994
# the root always exists, even if anon
973995
return True
974996
path = self._strip_protocol(path)
975997
bucket, key, version_id = self.split_path(path)
976998
if key:
977-
try:
978-
if self._ls_from_cache(path):
979-
return True
980-
except FileNotFoundError:
981-
return False
999+
exists_in_cache = self._exists_in_cache(path, bucket, key, version_id)
1000+
if exists_in_cache is not None:
1001+
return exists_in_cache
1002+
9821003
try:
9831004
await self._info(path, bucket, key, version_id=version_id)
9841005
return True
@@ -1216,6 +1237,8 @@ async def _open_file(range: int):
12161237
async def _info(self, path, bucket=None, key=None, refresh=False, version_id=None):
12171238
path = self._strip_protocol(path)
12181239
bucket, key, path_version_id = self.split_path(path)
1240+
fullpath = "/".join((bucket, key))
1241+
12191242
if version_id is not None:
12201243
if not self.version_aware:
12211244
raise ValueError(
@@ -1226,15 +1249,15 @@ async def _info(self, path, bucket=None, key=None, refresh=False, version_id=Non
12261249
return {"name": path, "size": 0, "type": "directory"}
12271250
version_id = _coalesce_version_id(path_version_id, version_id)
12281251
if not refresh:
1229-
out = self._ls_from_cache(path)
1252+
out = self._ls_from_cache(fullpath)
12301253
if out is not None:
12311254
if self.version_aware and version_id is not None:
12321255
# If cached info does not match requested version_id,
12331256
# fallback to calling head_object
12341257
out = [
12351258
o
12361259
for o in out
1237-
if o["name"] == path and version_id == o.get("VersionId")
1260+
if o["name"] == fullpath and version_id == o.get("VersionId")
12381261
]
12391262
if out:
12401263
return out[0]

s3fs/tests/test_s3fs.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1508,8 +1508,11 @@ def test_tags(s3):
15081508
assert s3.get_tags(fname) == tagset
15091509

15101510

1511-
def test_versions(s3):
1512-
versioned_file = versioned_bucket_name + "/versioned_file"
1511+
@pytest.mark.parametrize("prefix", ["", "/dir", "/dir/subdir"])
1512+
def test_versions(s3, prefix):
1513+
parent = versioned_bucket_name + prefix
1514+
versioned_file = parent + "/versioned_file"
1515+
15131516
s3 = S3FileSystem(
15141517
anon=False, version_aware=True, client_kwargs={"endpoint_url": endpoint_uri}
15151518
)
@@ -1537,6 +1540,17 @@ def test_versions(s3):
15371540
assert fo.version_id == first_version
15381541
assert fo.read() == b"1"
15391542

1543+
versioned_file_v1 = f"{versioned_file}?versionId={first_version}"
1544+
versioned_file_v2 = f"{versioned_file}?versionId={second_version}"
1545+
1546+
assert s3.ls(parent) == [versioned_file]
1547+
assert set(s3.ls(parent, versions=True)) == {versioned_file_v1, versioned_file_v2}
1548+
1549+
assert s3.exists(versioned_file_v1)
1550+
assert s3.info(versioned_file_v1)
1551+
assert s3.exists(versioned_file_v2)
1552+
assert s3.info(versioned_file_v2)
1553+
15401554

15411555
def test_list_versions_many(s3):
15421556
# moto doesn't actually behave in the same way that s3 does here so this doesn't test

0 commit comments

Comments
 (0)