Skip to content

Commit f8df756

Browse files
committed
TST: Replace ensure_clean utility with temp_file pytest fixture in IO tests
1 parent 5614c7c commit f8df756

File tree

3 files changed

+283
-289
lines changed

3 files changed

+283
-289
lines changed

pandas/tests/io/test_compression.py

Lines changed: 111 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import gzip
22
import io
33
import os
4-
from pathlib import Path
54
import subprocess
65
import sys
76
import tarfile
@@ -31,16 +30,16 @@
3130
],
3231
)
3332
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
34-
def test_compression_size(obj, method, compression_only):
33+
def test_compression_size(obj, method, compression_only, temp_file):
3534
if compression_only == "tar":
3635
compression_only = {"method": "tar", "mode": "w:gz"}
3736

38-
with tm.ensure_clean() as path:
39-
getattr(obj, method)(path, compression=compression_only)
40-
compressed_size = os.path.getsize(path)
41-
getattr(obj, method)(path, compression=None)
42-
uncompressed_size = os.path.getsize(path)
43-
assert uncompressed_size > compressed_size
37+
path = temp_file
38+
getattr(obj, method)(path, compression=compression_only)
39+
compressed_size = os.path.getsize(path)
40+
getattr(obj, method)(path, compression=None)
41+
uncompressed_size = os.path.getsize(path)
42+
assert uncompressed_size > compressed_size
4443

4544

4645
@pytest.mark.parametrize(
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
5453
],
5554
)
5655
@pytest.mark.parametrize("method", ["to_csv", "to_json"])
57-
def test_compression_size_fh(obj, method, compression_only):
58-
with tm.ensure_clean() as path:
59-
with icom.get_handle(
60-
path,
61-
"w:gz" if compression_only == "tar" else "w",
62-
compression=compression_only,
63-
) as handles:
64-
getattr(obj, method)(handles.handle)
65-
assert not handles.handle.closed
66-
compressed_size = os.path.getsize(path)
67-
with tm.ensure_clean() as path:
68-
with icom.get_handle(path, "w", compression=None) as handles:
69-
getattr(obj, method)(handles.handle)
70-
assert not handles.handle.closed
71-
uncompressed_size = os.path.getsize(path)
72-
assert uncompressed_size > compressed_size
56+
def test_compression_size_fh(obj, method, compression_only, temp_file):
57+
path = temp_file
58+
with icom.get_handle(
59+
path,
60+
"w:gz" if compression_only == "tar" else "w",
61+
compression=compression_only,
62+
) as handles:
63+
getattr(obj, method)(handles.handle)
64+
assert not handles.handle.closed
65+
compressed_size = os.path.getsize(path)
66+
67+
# Create a new temporary file for uncompressed comparison
68+
path2 = temp_file.parent / f"{temp_file.stem}_uncompressed{temp_file.suffix}"
69+
path2.touch()
70+
with icom.get_handle(path2, "w", compression=None) as handles:
71+
getattr(obj, method)(handles.handle)
72+
assert not handles.handle.closed
73+
uncompressed_size = os.path.getsize(path2)
74+
assert uncompressed_size > compressed_size
7375

7476

7577
@pytest.mark.parametrize(
@@ -81,14 +83,14 @@ def test_compression_size_fh(obj, method, compression_only):
8183
],
8284
)
8385
def test_dataframe_compression_defaults_to_infer(
84-
write_method, write_kwargs, read_method, compression_only, compression_to_extension
86+
write_method, write_kwargs, read_method, compression_only, compression_to_extension, temp_file
8587
):
8688
# GH22004
8789
input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"])
8890
extension = compression_to_extension[compression_only]
89-
with tm.ensure_clean("compressed" + extension) as path:
90-
getattr(input, write_method)(path, **write_kwargs)
91-
output = read_method(path, compression=compression_only)
91+
path = temp_file.parent / f"compressed{extension}"
92+
getattr(input, write_method)(path, **write_kwargs)
93+
output = read_method(path, compression=compression_only)
9294
tm.assert_frame_equal(output, input)
9395

9496

@@ -107,37 +109,38 @@ def test_series_compression_defaults_to_infer(
107109
read_kwargs,
108110
compression_only,
109111
compression_to_extension,
112+
temp_file,
110113
):
111114
# GH22004
112115
input = pd.Series([0, 5, -2, 10], name="X")
113116
extension = compression_to_extension[compression_only]
114-
with tm.ensure_clean("compressed" + extension) as path:
115-
getattr(input, write_method)(path, **write_kwargs)
116-
if "squeeze" in read_kwargs:
117-
kwargs = read_kwargs.copy()
118-
del kwargs["squeeze"]
119-
output = read_method(path, compression=compression_only, **kwargs).squeeze(
120-
"columns"
121-
)
122-
else:
123-
output = read_method(path, compression=compression_only, **read_kwargs)
117+
path = temp_file.parent / f"compressed{extension}"
118+
getattr(input, write_method)(path, **write_kwargs)
119+
if "squeeze" in read_kwargs:
120+
kwargs = read_kwargs.copy()
121+
del kwargs["squeeze"]
122+
output = read_method(path, compression=compression_only, **kwargs).squeeze(
123+
"columns"
124+
)
125+
else:
126+
output = read_method(path, compression=compression_only, **read_kwargs)
124127
tm.assert_series_equal(output, input, check_names=False)
125128

126129

127-
def test_compression_warning(compression_only):
130+
def test_compression_warning(compression_only, temp_file):
128131
# Assert that passing a file object to to_csv while explicitly specifying a
129132
# compression protocol triggers a RuntimeWarning, as per GH21227.
130133
df = pd.DataFrame(
131134
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
132135
columns=["X", "Y", "Z"],
133136
)
134-
with tm.ensure_clean() as path:
135-
with icom.get_handle(path, "w", compression=compression_only) as handles:
136-
with tm.assert_produces_warning(RuntimeWarning, match="has no effect"):
137-
df.to_csv(handles.handle, compression=compression_only)
137+
path = temp_file
138+
with icom.get_handle(path, "w", compression=compression_only) as handles:
139+
with tm.assert_produces_warning(RuntimeWarning, match="has no effect"):
140+
df.to_csv(handles.handle, compression=compression_only)
138141

139142

140-
def test_compression_binary(compression_only):
143+
def test_compression_binary(compression_only, temp_file):
141144
"""
142145
Binary file handles support compression.
143146
@@ -150,13 +153,13 @@ def test_compression_binary(compression_only):
150153
)
151154

152155
# with a file
153-
with tm.ensure_clean() as path:
154-
with open(path, mode="wb") as file:
155-
df.to_csv(file, mode="wb", compression=compression_only)
156-
file.seek(0) # file shouldn't be closed
157-
tm.assert_frame_equal(
158-
df, pd.read_csv(path, index_col=0, compression=compression_only)
159-
)
156+
path = temp_file
157+
with open(path, mode="wb") as file:
158+
df.to_csv(file, mode="wb", compression=compression_only)
159+
file.seek(0) # file shouldn't be closed
160+
tm.assert_frame_equal(
161+
df, pd.read_csv(path, index_col=0, compression=compression_only)
162+
)
160163

161164
# with BytesIO
162165
file = io.BytesIO()
@@ -167,7 +170,7 @@ def test_compression_binary(compression_only):
167170
)
168171

169172

170-
def test_gzip_reproducibility_file_name():
173+
def test_gzip_reproducibility_file_name(temp_file):
171174
"""
172175
Gzip should create reproducible archives with mtime.
173176
@@ -183,13 +186,12 @@ def test_gzip_reproducibility_file_name():
183186
compression_options = {"method": "gzip", "mtime": 1}
184187

185188
# test for filename
186-
with tm.ensure_clean() as path:
187-
path = Path(path)
188-
df.to_csv(path, compression=compression_options)
189-
time.sleep(0.1)
190-
output = path.read_bytes()
191-
df.to_csv(path, compression=compression_options)
192-
assert output == path.read_bytes()
189+
path = temp_file
190+
df.to_csv(path, compression=compression_options)
191+
time.sleep(0.1)
192+
output = path.read_bytes()
193+
df.to_csv(path, compression=compression_options)
194+
assert output == path.read_bytes()
193195

194196

195197
def test_gzip_reproducibility_file_object():
@@ -259,14 +261,14 @@ def test_with_missing_lzma_runtime():
259261
],
260262
)
261263
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
262-
def test_gzip_compression_level(obj, method):
264+
def test_gzip_compression_level(obj, method, temp_file):
263265
# GH33196
264-
with tm.ensure_clean() as path:
265-
getattr(obj, method)(path, compression="gzip")
266-
compressed_size_default = os.path.getsize(path)
267-
getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1})
268-
compressed_size_fast = os.path.getsize(path)
269-
assert compressed_size_default < compressed_size_fast
266+
path = temp_file
267+
getattr(obj, method)(path, compression="gzip")
268+
compressed_size_default = os.path.getsize(path)
269+
getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1})
270+
compressed_size_fast = os.path.getsize(path)
271+
assert compressed_size_default < compressed_size_fast
270272

271273

272274
@pytest.mark.parametrize(
@@ -280,15 +282,15 @@ def test_gzip_compression_level(obj, method):
280282
],
281283
)
282284
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
283-
def test_xz_compression_level_read(obj, method):
284-
with tm.ensure_clean() as path:
285-
getattr(obj, method)(path, compression="xz")
286-
compressed_size_default = os.path.getsize(path)
287-
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
288-
compressed_size_fast = os.path.getsize(path)
289-
assert compressed_size_default < compressed_size_fast
290-
if method == "to_csv":
291-
pd.read_csv(path, compression="xz")
285+
def test_xz_compression_level_read(obj, method, temp_file):
286+
path = temp_file
287+
getattr(obj, method)(path, compression="xz")
288+
compressed_size_default = os.path.getsize(path)
289+
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
290+
compressed_size_fast = os.path.getsize(path)
291+
assert compressed_size_default < compressed_size_fast
292+
if method == "to_csv":
293+
pd.read_csv(path, compression="xz")
292294

293295

294296
@pytest.mark.parametrize(
@@ -302,13 +304,13 @@ def test_xz_compression_level_read(obj, method):
302304
],
303305
)
304306
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
305-
def test_bzip_compression_level(obj, method):
307+
def test_bzip_compression_level(obj, method, temp_file):
306308
"""GH33196 bzip needs file size > 100k to show a size difference between
307309
compression levels, so here we just check if the call works when
308310
compression is passed as a dict.
309311
"""
310-
with tm.ensure_clean() as path:
311-
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})
312+
path = temp_file
313+
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})
312314

313315

314316
@pytest.mark.parametrize(
@@ -318,21 +320,21 @@ def test_bzip_compression_level(obj, method):
318320
(".tar", tarfile.TarFile),
319321
],
320322
)
321-
def test_empty_archive_zip(suffix, archive):
322-
with tm.ensure_clean(filename=suffix) as path:
323-
with archive(path, "w"):
324-
pass
325-
with pytest.raises(ValueError, match="Zero files found"):
326-
pd.read_csv(path)
323+
def test_empty_archive_zip(suffix, archive, temp_file):
324+
path = temp_file.parent / f"archive{suffix}"
325+
with archive(path, "w"):
326+
pass
327+
with pytest.raises(ValueError, match="Zero files found"):
328+
pd.read_csv(path)
327329

328330

329-
def test_ambiguous_archive_zip():
330-
with tm.ensure_clean(filename=".zip") as path:
331-
with zipfile.ZipFile(path, "w") as file:
332-
file.writestr("a.csv", "foo,bar")
333-
file.writestr("b.csv", "foo,bar")
334-
with pytest.raises(ValueError, match="Multiple files found in ZIP file"):
335-
pd.read_csv(path)
331+
def test_ambiguous_archive_zip(temp_file):
332+
path = temp_file.parent / "archive.zip"
333+
with zipfile.ZipFile(path, "w") as file:
334+
file.writestr("a.csv", "foo,bar")
335+
file.writestr("b.csv", "foo,bar")
336+
with pytest.raises(ValueError, match="Multiple files found in ZIP file"):
337+
pd.read_csv(path)
336338

337339

338340
def test_ambiguous_archive_tar(tmp_path):
@@ -352,24 +354,24 @@ def test_ambiguous_archive_tar(tmp_path):
352354
pd.read_csv(tarpath)
353355

354356

355-
def test_tar_gz_to_different_filename():
356-
with tm.ensure_clean(filename=".foo") as file:
357-
pd.DataFrame(
358-
[["1", "2"]],
359-
columns=["foo", "bar"],
360-
).to_csv(file, compression={"method": "tar", "mode": "w:gz"}, index=False)
361-
with gzip.open(file) as uncompressed:
362-
with tarfile.TarFile(fileobj=uncompressed) as archive:
363-
members = archive.getmembers()
364-
assert len(members) == 1
365-
content = archive.extractfile(members[0]).read().decode("utf8")
366-
367-
if is_platform_windows():
368-
expected = "foo,bar\r\n1,2\r\n"
369-
else:
370-
expected = "foo,bar\n1,2\n"
371-
372-
assert content == expected
357+
def test_tar_gz_to_different_filename(temp_file):
358+
file = temp_file.parent / "archive.foo"
359+
pd.DataFrame(
360+
[["1", "2"]],
361+
columns=["foo", "bar"],
362+
).to_csv(file, compression={"method": "tar", "mode": "w:gz"}, index=False)
363+
with gzip.open(file) as uncompressed:
364+
with tarfile.TarFile(fileobj=uncompressed) as archive:
365+
members = archive.getmembers()
366+
assert len(members) == 1
367+
content = archive.extractfile(members[0]).read().decode("utf8")
368+
369+
if is_platform_windows():
370+
expected = "foo,bar\r\n1,2\r\n"
371+
else:
372+
expected = "foo,bar\n1,2\n"
373+
374+
assert content == expected
373375

374376

375377
def test_tar_no_error_on_close():

0 commit comments

Comments
 (0)