Skip to content

Commit 179f911

Browse files
committed
format
1 parent c77158e commit 179f911

File tree

3 files changed

+58
-9
lines changed

3 files changed

+58
-9
lines changed

pandas/io/common.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,8 +1295,8 @@ def _infer_protocol(path: str) -> str:
12951295
if is_platform_windows() and re.match(r"^[a-zA-Z]:[\\/]", path):
12961296
return "file"
12971297

1298-
parsed = parse_url(path)
1299-
if parsed.scheme in _VALID_URLS:
1298+
if is_fsspec_url(path):
1299+
parsed = parse_url(path)
13001300
return parsed.scheme
13011301
return "file"
13021302

@@ -1396,19 +1396,19 @@ def iterdir(
13961396
# Remote paths (e.g., s3)
13971397
fsspec = import_optional_dependency("fsspec", extra=scheme)
13981398
fs = fsspec.filesystem(scheme)
1399-
if fs.isfile(path):
1400-
path_obj = PurePosixPath(path)
1399+
path_without_scheme = fsspec.core.strip_protocol(path_str)
1400+
if fs.isfile(path_without_scheme):
14011401
if _match_file(
1402-
path_obj,
1402+
path_without_scheme,
14031403
extensions,
14041404
glob,
14051405
):
1406-
yield path_obj
1406+
yield PurePosixPath(path_without_scheme)
14071407
return
1408-
if not fs.isdir(path):
1408+
if not fs.isdir(path_without_scheme):
14091409
raise NotADirectoryError(f"Path {path!r} is neither a file nor a directory.")
14101410

1411-
files = fs.ls(path, detail=True)
1411+
files = fs.ls(path_without_scheme, detail=True)
14121412
for f in files:
14131413
if f["type"] == "file":
14141414
path_obj = PurePosixPath(f["name"])

pandas/tests/io/conftest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,21 @@ def directory_with_dummy_csv(tmp_path):
234234
file_path = tmp_path / f"file_{i}.csv"
235235
file_path.touch()
236236
return tmp_path
237+
238+
239+
@pytest.fixture
240+
def mock_remote_csv_directory(monkeypatch):
241+
_ = pytest.importorskip("fsspec", reason="fsspec is required for remote tests")
242+
243+
from fsspec.implementations.memory import MemoryFileSystem
244+
245+
fs = MemoryFileSystem()
246+
fs.store.clear()
247+
248+
dir_name = "remote-bucket"
249+
fs.pipe(f"{dir_name}/a.csv", b"a,b,c\n1,2,3\n")
250+
fs.pipe(f"{dir_name}/b.csv", b"a,b,c\n4,5,6\n")
251+
fs.pipe(f"{dir_name}/nested/ignored.csv", b"x,y,z\n")
252+
253+
monkeypatch.setattr("fsspec.filesystem", lambda _: fs)
254+
return f"s3://{dir_name}"

pandas/tests/io/test_common.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,39 @@ def test_pyarrow_read_csv_datetime_dtype():
697697
tm.assert_frame_equal(expect, result)
698698

699699

700-
def test_iterdir(directory_with_dummy_csv):
700+
def test_iterdir_local(directory_with_dummy_csv):
701701
for file in icom.iterdir(directory_with_dummy_csv):
702702
assert file.is_file()
703703
assert file.name.startswith("file_")
704704
assert file.suffix == ".csv"
705+
706+
707+
def test_mock_remote_csv_directory_contents(mock_remote_csv_directory):
708+
import fsspec
709+
from fsspec.implementations.memory import MemoryFileSystem
710+
711+
fs = fsspec.filesystem("s3")
712+
assert isinstance(fs, MemoryFileSystem)
713+
714+
assert fs.exists("remote-bucket")
715+
assert fs.isdir("remote-bucket")
716+
717+
files = fs.ls("remote-bucket", detail=True)
718+
719+
file_names = sorted(f["name"] for f in files if f["type"] == "file")
720+
assert file_names == ["/remote-bucket/a.csv", "/remote-bucket/b.csv"]
721+
722+
dir_names = [f["name"] for f in files if f["type"] == "directory"]
723+
assert "/remote-bucket/nested" in dir_names
724+
725+
nested_files = fs.ls("remote-bucket/nested", detail=True)
726+
assert nested_files[0]["name"] == "/remote-bucket/nested/ignored.csv"
727+
728+
729+
def test_iterdir_remote(mock_remote_csv_directory):
730+
import fsspec
731+
732+
fs = fsspec.filesystem("s3")
733+
for file in icom.iterdir(mock_remote_csv_directory):
734+
assert fs.isfile(file)
735+
assert file.suffix == ".csv"

0 commit comments

Comments
 (0)