From e8f64a829ad11b700f661617708d29e34a981c2c Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 7 Oct 2025 13:59:07 -0400 Subject: [PATCH 1/2] Prevent duplicated entries in find() in presence of directory markers --- s3fs/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/s3fs/core.py b/s3fs/core.py index d8848db3..d7f29058 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -912,7 +912,10 @@ async def _find( # Explicitly add directories to their parents in the dircache for d in dirs: par = self._parent(d["name"]) - if par in thisdircache: + # extra condition here (in any()) to deal with director-marking files + if par in thisdircache and not any( + _["name"] == d["name"] for _ in thisdircache[par] + ): thisdircache[par].append(d) if not prefix: From d9dc21171496c59346b9084bec6b204481d5d6a7 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 9 Oct 2025 17:07:37 -0400 Subject: [PATCH 2/2] repro --- s3fs/tests/test_s3fs.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 45609eb6..65e7b44a 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2993,3 +2993,28 @@ def test_bucket_info(s3): assert "VersionId" in info assert info["type"] == "directory" assert info["name"] == test_bucket_name + + +def test_find_ls_fail(s3): + # beacuse of https://github.com/fsspec/s3fs/pull/989 + client = get_boto3_client() + files = { + f"{test_bucket_name}/find/a/a": b"data", + f"{test_bucket_name}/find/a/b": b"data", + f"{test_bucket_name}/find/a": b"", # placeholder without "/" + f"{test_bucket_name}/find/b": b"", # empty placeholder without "/" + f"{test_bucket_name}/find/c/c": b"data", # directory with no placeholder + f"{test_bucket_name}/find/d/d": b"data", # dir will acquire placeholder with "/" + } + client.put_object(Bucket=test_bucket_name, Key="find/d/", Body=b"") + s3.pipe(files) + + out0 = s3.ls(f"{test_bucket_name}/find", detail=True) + s3.find(test_bucket_name, detail=False) + out = s3.ls(f"{test_bucket_name}/find", detail=True) + assert out == out0 + + s3.invalidate_cache() + s3.find(f"{test_bucket_name}/find", detail=False) + out = s3.ls(f"{test_bucket_name}/find", detail=True) + assert out == out0