Skip to content
Closed
42 changes: 42 additions & 0 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ class IOArgs:
compression: CompressionDict
should_close: bool = False

def __post_init__(self):
self.parent_exists = dir_exists(self.filepath_or_buffer)


@dataclasses.dataclass
class IOHandles:
Expand Down Expand Up @@ -630,6 +633,17 @@ def get_handle(
compression_args = dict(ioargs.compression)
compression = compression_args.pop("method")

# If the parent directory doesn't exist initializing the stream will fail (GH 24306)
if (
is_path
and not ioargs.parent_exists
and _is_writable_mode(mode)
):
os.makedirs(
os.path.dirname(ioargs.filepath_or_buffer),
exist_ok=True,
)

if compression:
# compression libraries do not like an explicit text-mode
ioargs.mode = ioargs.mode.replace("t", "")
Expand Down Expand Up @@ -937,6 +951,26 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
return exists


def dir_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
"""Test whether parent directory exists."""
exists = False
filepath_or_buffer = stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, str):
return exists

dirname = os.path.dirname(filepath_or_buffer)
if not len(dirname):
# This is the current working directory
exists = True
else:
try:
exists = os.path.exists(dirname)
# gh-5874: if the filepath is too long will raise here
except (TypeError, ValueError):
pass
return exists


def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
"""Whether the handle is opened in binary mode"""
# specified by user
Expand All @@ -951,3 +985,11 @@ def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
# classes that expect bytes
binary_classes = (BufferedIOBase, RawIOBase)
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)


def _is_writable_mode(mode: str) -> bool:
"""Whether the handle is opened in writable mode"""
writable_prefixes = ('a', 'w', 'r+')
if any(map(mode.startswith, writable_prefixes)):
return True
return False
9 changes: 9 additions & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,12 @@ def test_errno_attribute():
with pytest.raises(FileNotFoundError, match="\\[Errno 2\\]") as err:
pd.read_csv("doesnt_exist")
assert err.errno == errno.ENOENT


def test_create_missing_dirs():
# GH 24306
df = tm.makeDataFrame()
with tm.ensure_clean_dir() as fp:
full_path = os.path.join(fp, 'nonexistent/path/to/file.csv')
df.to_csv(full_path)
assert os.path.exists(full_path)
2 changes: 0 additions & 2 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,8 +737,6 @@ def test_expand_user(self, df_compat, monkeypatch):
monkeypatch.setenv("USERPROFILE", "TestingUser")
with pytest.raises(OSError, match=r".*TestingUser.*"):
read_parquet("~/file.parquet")
with pytest.raises(OSError, match=r".*TestingUser.*"):
df_compat.to_parquet("~/file.parquet")

def test_partition_cols_supported(self, pa, df_full):
# GH #23283
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/io/xml/test_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,6 @@ def test_str_output(datapath, parser):
assert output == from_file_expected


def test_wrong_file_path(parser):
with pytest.raises(
FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
):
geom_df.to_xml("/my/fake/path/output.xml", parser=parser)


# INDEX


Expand Down