Skip to content

Commit 3622864

Browse files
ritoban23Justine Wezenaar
authored andcommitted
TST: Replace ensure_clean utility with temp_file pytest fixture (3 files) (pandas-dev#62475)
1 parent 289a8f9 commit 3622864

File tree

3 files changed

+292
-299
lines changed

3 files changed

+292
-299
lines changed

pandas/tests/io/test_compression.py

Lines changed: 116 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import gzip
22
import io
33
import os
4-
from pathlib import Path
54
import subprocess
65
import sys
76
import tarfile
@@ -31,16 +30,16 @@
3130
],
3231
)
3332
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
34-
def test_compression_size(obj, method, compression_only):
33+
def test_compression_size(obj, method, compression_only, temp_file):
3534
if compression_only == "tar":
3635
compression_only = {"method": "tar", "mode": "w:gz"}
3736

38-
with tm.ensure_clean() as path:
39-
getattr(obj, method)(path, compression=compression_only)
40-
compressed_size = os.path.getsize(path)
41-
getattr(obj, method)(path, compression=None)
42-
uncompressed_size = os.path.getsize(path)
43-
assert uncompressed_size > compressed_size
37+
path = temp_file
38+
getattr(obj, method)(path, compression=compression_only)
39+
compressed_size = os.path.getsize(path)
40+
getattr(obj, method)(path, compression=None)
41+
uncompressed_size = os.path.getsize(path)
42+
assert uncompressed_size > compressed_size
4443

4544

4645
@pytest.mark.parametrize(
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
5453
],
5554
)
5655
@pytest.mark.parametrize("method", ["to_csv", "to_json"])
57-
def test_compression_size_fh(obj, method, compression_only):
58-
with tm.ensure_clean() as path:
59-
with icom.get_handle(
60-
path,
61-
"w:gz" if compression_only == "tar" else "w",
62-
compression=compression_only,
63-
) as handles:
64-
getattr(obj, method)(handles.handle)
65-
assert not handles.handle.closed
66-
compressed_size = os.path.getsize(path)
67-
with tm.ensure_clean() as path:
68-
with icom.get_handle(path, "w", compression=None) as handles:
69-
getattr(obj, method)(handles.handle)
70-
assert not handles.handle.closed
71-
uncompressed_size = os.path.getsize(path)
72-
assert uncompressed_size > compressed_size
56+
def test_compression_size_fh(obj, method, compression_only, temp_file):
57+
path = temp_file
58+
with icom.get_handle(
59+
path,
60+
"w:gz" if compression_only == "tar" else "w",
61+
compression=compression_only,
62+
) as handles:
63+
getattr(obj, method)(handles.handle)
64+
assert not handles.handle.closed
65+
compressed_size = os.path.getsize(path)
66+
67+
# Create a new temporary file for uncompressed comparison
68+
path2 = temp_file.parent / f"{temp_file.stem}_uncompressed{temp_file.suffix}"
69+
path2.touch()
70+
with icom.get_handle(path2, "w", compression=None) as handles:
71+
getattr(obj, method)(handles.handle)
72+
assert not handles.handle.closed
73+
uncompressed_size = os.path.getsize(path2)
74+
assert uncompressed_size > compressed_size
7375

7476

7577
@pytest.mark.parametrize(
@@ -81,14 +83,19 @@ def test_compression_size_fh(obj, method, compression_only):
8183
],
8284
)
8385
def test_dataframe_compression_defaults_to_infer(
84-
write_method, write_kwargs, read_method, compression_only, compression_to_extension
86+
write_method,
87+
write_kwargs,
88+
read_method,
89+
compression_only,
90+
compression_to_extension,
91+
temp_file,
8592
):
8693
# GH22004
8794
input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"])
8895
extension = compression_to_extension[compression_only]
89-
with tm.ensure_clean("compressed" + extension) as path:
90-
getattr(input, write_method)(path, **write_kwargs)
91-
output = read_method(path, compression=compression_only)
96+
path = temp_file.parent / f"compressed{extension}"
97+
getattr(input, write_method)(path, **write_kwargs)
98+
output = read_method(path, compression=compression_only)
9299
tm.assert_frame_equal(output, input)
93100

94101

@@ -107,37 +114,38 @@ def test_series_compression_defaults_to_infer(
107114
read_kwargs,
108115
compression_only,
109116
compression_to_extension,
117+
temp_file,
110118
):
111119
# GH22004
112120
input = pd.Series([0, 5, -2, 10], name="X")
113121
extension = compression_to_extension[compression_only]
114-
with tm.ensure_clean("compressed" + extension) as path:
115-
getattr(input, write_method)(path, **write_kwargs)
116-
if "squeeze" in read_kwargs:
117-
kwargs = read_kwargs.copy()
118-
del kwargs["squeeze"]
119-
output = read_method(path, compression=compression_only, **kwargs).squeeze(
120-
"columns"
121-
)
122-
else:
123-
output = read_method(path, compression=compression_only, **read_kwargs)
122+
path = temp_file.parent / f"compressed{extension}"
123+
getattr(input, write_method)(path, **write_kwargs)
124+
if "squeeze" in read_kwargs:
125+
kwargs = read_kwargs.copy()
126+
del kwargs["squeeze"]
127+
output = read_method(path, compression=compression_only, **kwargs).squeeze(
128+
"columns"
129+
)
130+
else:
131+
output = read_method(path, compression=compression_only, **read_kwargs)
124132
tm.assert_series_equal(output, input, check_names=False)
125133

126134

127-
def test_compression_warning(compression_only):
135+
def test_compression_warning(compression_only, temp_file):
128136
# Assert that passing a file object to to_csv while explicitly specifying a
129137
# compression protocol triggers a RuntimeWarning, as per GH21227.
130138
df = pd.DataFrame(
131139
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
132140
columns=["X", "Y", "Z"],
133141
)
134-
with tm.ensure_clean() as path:
135-
with icom.get_handle(path, "w", compression=compression_only) as handles:
136-
with tm.assert_produces_warning(RuntimeWarning, match="has no effect"):
137-
df.to_csv(handles.handle, compression=compression_only)
142+
path = temp_file
143+
with icom.get_handle(path, "w", compression=compression_only) as handles:
144+
with tm.assert_produces_warning(RuntimeWarning, match="has no effect"):
145+
df.to_csv(handles.handle, compression=compression_only)
138146

139147

140-
def test_compression_binary(compression_only):
148+
def test_compression_binary(compression_only, temp_file):
141149
"""
142150
Binary file handles support compression.
143151
@@ -150,13 +158,13 @@ def test_compression_binary(compression_only):
150158
)
151159

152160
# with a file
153-
with tm.ensure_clean() as path:
154-
with open(path, mode="wb") as file:
155-
df.to_csv(file, mode="wb", compression=compression_only)
156-
file.seek(0) # file shouldn't be closed
157-
tm.assert_frame_equal(
158-
df, pd.read_csv(path, index_col=0, compression=compression_only)
159-
)
161+
path = temp_file
162+
with open(path, mode="wb") as file:
163+
df.to_csv(file, mode="wb", compression=compression_only)
164+
file.seek(0) # file shouldn't be closed
165+
tm.assert_frame_equal(
166+
df, pd.read_csv(path, index_col=0, compression=compression_only)
167+
)
160168

161169
# with BytesIO
162170
file = io.BytesIO()
@@ -167,7 +175,7 @@ def test_compression_binary(compression_only):
167175
)
168176

169177

170-
def test_gzip_reproducibility_file_name():
178+
def test_gzip_reproducibility_file_name(temp_file):
171179
"""
172180
Gzip should create reproducible archives with mtime.
173181
@@ -183,13 +191,12 @@ def test_gzip_reproducibility_file_name():
183191
compression_options = {"method": "gzip", "mtime": 1}
184192

185193
# test for filename
186-
with tm.ensure_clean() as path:
187-
path = Path(path)
188-
df.to_csv(path, compression=compression_options)
189-
time.sleep(0.1)
190-
output = path.read_bytes()
191-
df.to_csv(path, compression=compression_options)
192-
assert output == path.read_bytes()
194+
path = temp_file
195+
df.to_csv(path, compression=compression_options)
196+
time.sleep(0.1)
197+
output = path.read_bytes()
198+
df.to_csv(path, compression=compression_options)
199+
assert output == path.read_bytes()
193200

194201

195202
def test_gzip_reproducibility_file_object():
@@ -259,14 +266,14 @@ def test_with_missing_lzma_runtime():
259266
],
260267
)
261268
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
262-
def test_gzip_compression_level(obj, method):
269+
def test_gzip_compression_level(obj, method, temp_file):
263270
# GH33196
264-
with tm.ensure_clean() as path:
265-
getattr(obj, method)(path, compression="gzip")
266-
compressed_size_default = os.path.getsize(path)
267-
getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1})
268-
compressed_size_fast = os.path.getsize(path)
269-
assert compressed_size_default < compressed_size_fast
271+
path = temp_file
272+
getattr(obj, method)(path, compression="gzip")
273+
compressed_size_default = os.path.getsize(path)
274+
getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1})
275+
compressed_size_fast = os.path.getsize(path)
276+
assert compressed_size_default < compressed_size_fast
270277

271278

272279
@pytest.mark.parametrize(
@@ -280,15 +287,15 @@ def test_gzip_compression_level(obj, method):
280287
],
281288
)
282289
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
283-
def test_xz_compression_level_read(obj, method):
284-
with tm.ensure_clean() as path:
285-
getattr(obj, method)(path, compression="xz")
286-
compressed_size_default = os.path.getsize(path)
287-
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
288-
compressed_size_fast = os.path.getsize(path)
289-
assert compressed_size_default < compressed_size_fast
290-
if method == "to_csv":
291-
pd.read_csv(path, compression="xz")
290+
def test_xz_compression_level_read(obj, method, temp_file):
291+
path = temp_file
292+
getattr(obj, method)(path, compression="xz")
293+
compressed_size_default = os.path.getsize(path)
294+
getattr(obj, method)(path, compression={"method": "xz", "preset": 1})
295+
compressed_size_fast = os.path.getsize(path)
296+
assert compressed_size_default < compressed_size_fast
297+
if method == "to_csv":
298+
pd.read_csv(path, compression="xz")
292299

293300

294301
@pytest.mark.parametrize(
@@ -302,13 +309,13 @@ def test_xz_compression_level_read(obj, method):
302309
],
303310
)
304311
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
305-
def test_bzip_compression_level(obj, method):
312+
def test_bzip_compression_level(obj, method, temp_file):
306313
"""GH33196 bzip needs file size > 100k to show a size difference between
307314
compression levels, so here we just check if the call works when
308315
compression is passed as a dict.
309316
"""
310-
with tm.ensure_clean() as path:
311-
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})
317+
path = temp_file
318+
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})
312319

313320

314321
@pytest.mark.parametrize(
@@ -318,21 +325,21 @@ def test_bzip_compression_level(obj, method):
318325
(".tar", tarfile.TarFile),
319326
],
320327
)
321-
def test_empty_archive_zip(suffix, archive):
322-
with tm.ensure_clean(filename=suffix) as path:
323-
with archive(path, "w"):
324-
pass
325-
with pytest.raises(ValueError, match="Zero files found"):
326-
pd.read_csv(path)
328+
def test_empty_archive_zip(suffix, archive, temp_file):
329+
path = temp_file.parent / f"archive{suffix}"
330+
with archive(path, "w"):
331+
pass
332+
with pytest.raises(ValueError, match="Zero files found"):
333+
pd.read_csv(path)
327334

328335

329-
def test_ambiguous_archive_zip():
330-
with tm.ensure_clean(filename=".zip") as path:
331-
with zipfile.ZipFile(path, "w") as file:
332-
file.writestr("a.csv", "foo,bar")
333-
file.writestr("b.csv", "foo,bar")
334-
with pytest.raises(ValueError, match="Multiple files found in ZIP file"):
335-
pd.read_csv(path)
336+
def test_ambiguous_archive_zip(temp_file):
337+
path = temp_file.parent / "archive.zip"
338+
with zipfile.ZipFile(path, "w") as file:
339+
file.writestr("a.csv", "foo,bar")
340+
file.writestr("b.csv", "foo,bar")
341+
with pytest.raises(ValueError, match="Multiple files found in ZIP file"):
342+
pd.read_csv(path)
336343

337344

338345
def test_ambiguous_archive_tar(tmp_path):
@@ -352,24 +359,24 @@ def test_ambiguous_archive_tar(tmp_path):
352359
pd.read_csv(tarpath)
353360

354361

355-
def test_tar_gz_to_different_filename():
356-
with tm.ensure_clean(filename=".foo") as file:
357-
pd.DataFrame(
358-
[["1", "2"]],
359-
columns=["foo", "bar"],
360-
).to_csv(file, compression={"method": "tar", "mode": "w:gz"}, index=False)
361-
with gzip.open(file) as uncompressed:
362-
with tarfile.TarFile(fileobj=uncompressed) as archive:
363-
members = archive.getmembers()
364-
assert len(members) == 1
365-
content = archive.extractfile(members[0]).read().decode("utf8")
366-
367-
if is_platform_windows():
368-
expected = "foo,bar\r\n1,2\r\n"
369-
else:
370-
expected = "foo,bar\n1,2\n"
371-
372-
assert content == expected
362+
def test_tar_gz_to_different_filename(temp_file):
363+
file = temp_file.parent / "archive.foo"
364+
pd.DataFrame(
365+
[["1", "2"]],
366+
columns=["foo", "bar"],
367+
).to_csv(file, compression={"method": "tar", "mode": "w:gz"}, index=False)
368+
with gzip.open(file) as uncompressed:
369+
with tarfile.TarFile(fileobj=uncompressed) as archive:
370+
members = archive.getmembers()
371+
assert len(members) == 1
372+
content = archive.extractfile(members[0]).read().decode("utf8")
373+
374+
if is_platform_windows():
375+
expected = "foo,bar\r\n1,2\r\n"
376+
else:
377+
expected = "foo,bar\n1,2\n"
378+
379+
assert content == expected
373380

374381

375382
def test_tar_no_error_on_close():

0 commit comments

Comments
 (0)