diff --git a/s3fs/core.py b/s3fs/core.py index d8848db3..d7f29058 100644 --- a/s3fs/core.py +++ b/s3fs/core.py @@ -912,7 +912,10 @@ async def _find( # Explicitly add directories to their parents in the dircache for d in dirs: par = self._parent(d["name"]) - if par in thisdircache: + # extra condition here (in any()) to deal with director-marking files + if par in thisdircache and not any( + _["name"] == d["name"] for _ in thisdircache[par] + ): thisdircache[par].append(d) if not prefix: diff --git a/s3fs/tests/test_s3fs.py b/s3fs/tests/test_s3fs.py index 45609eb6..65e7b44a 100644 --- a/s3fs/tests/test_s3fs.py +++ b/s3fs/tests/test_s3fs.py @@ -2993,3 +2993,28 @@ def test_bucket_info(s3): assert "VersionId" in info assert info["type"] == "directory" assert info["name"] == test_bucket_name + + +def test_find_ls_fail(s3): + # beacuse of https://github.com/fsspec/s3fs/pull/989 + client = get_boto3_client() + files = { + f"{test_bucket_name}/find/a/a": b"data", + f"{test_bucket_name}/find/a/b": b"data", + f"{test_bucket_name}/find/a": b"", # placeholder without "/" + f"{test_bucket_name}/find/b": b"", # empty placeholder without "/" + f"{test_bucket_name}/find/c/c": b"data", # directory with no placeholder + f"{test_bucket_name}/find/d/d": b"data", # dir will acquire placeholder with "/" + } + client.put_object(Bucket=test_bucket_name, Key="find/d/", Body=b"") + s3.pipe(files) + + out0 = s3.ls(f"{test_bucket_name}/find", detail=True) + s3.find(test_bucket_name, detail=False) + out = s3.ls(f"{test_bucket_name}/find", detail=True) + assert out == out0 + + s3.invalidate_cache() + s3.find(f"{test_bucket_name}/find", detail=False) + out = s3.ls(f"{test_bucket_name}/find", detail=True) + assert out == out0