Skip to content

Commit 6cca195

Browse files
TST: Assist Replace ensure_clean utility function with the temp_file pytest fixture #62435 (#62461)
1 parent bd88fa0 commit 6cca195

File tree

3 files changed

+540
-536
lines changed

3 files changed

+540
-536
lines changed

pandas/tests/frame/methods/test_to_csv.py

Lines changed: 138 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def _return_result_expected(
248248
self,
249249
df,
250250
chunksize,
251+
temp_file,
251252
r_dtype=None,
252253
c_dtype=None,
253254
rnlvl=None,
@@ -260,15 +261,13 @@ def _return_result_expected(
260261
kwargs["index_col"] = list(range(rnlvl))
261262
kwargs["header"] = list(range(cnlvl))
262263

263-
with tm.ensure_clean("__tmp_to_csv_moar__") as path:
264-
df.to_csv(path, encoding="utf8", chunksize=chunksize)
265-
recons = self.read_csv(path, **kwargs)
264+
df.to_csv(temp_file, encoding="utf8", chunksize=chunksize)
265+
recons = self.read_csv(temp_file, **kwargs)
266266
else:
267267
kwargs["header"] = 0
268268

269-
with tm.ensure_clean("__tmp_to_csv_moar__") as path:
270-
df.to_csv(path, encoding="utf8", chunksize=chunksize)
271-
recons = self.read_csv(path, **kwargs)
269+
df.to_csv(temp_file, encoding="utf8", chunksize=chunksize)
270+
recons = self.read_csv(temp_file, **kwargs)
272271

273272
def _to_uni(x):
274273
if not isinstance(x, str):
@@ -353,13 +352,13 @@ def _to_uni(x):
353352
@pytest.mark.parametrize(
354353
"nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
355354
)
356-
def test_to_csv_nrows(self, nrows):
355+
def test_to_csv_nrows(self, nrows, temp_file):
357356
df = DataFrame(
358357
np.ones((nrows, 4)),
359358
index=date_range("2020-01-01", periods=nrows),
360359
columns=Index(list("abcd"), dtype=object),
361360
)
362-
result, expected = self._return_result_expected(df, 1000, "dt", "s")
361+
result, expected = self._return_result_expected(df, 1000, temp_file, "dt", "s")
363362
expected.index = expected.index.astype("M8[ns]")
364363
tm.assert_frame_equal(result, expected, check_names=False)
365364

@@ -372,7 +371,7 @@ def test_to_csv_nrows(self, nrows):
372371
)
373372
@pytest.mark.parametrize("ncols", [1, 2, 3, 4])
374373
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
375-
def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
374+
def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols, temp_file):
376375
axes = {
377376
"i": lambda n: Index(np.arange(n), dtype=np.int64),
378377
"s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
@@ -387,6 +386,7 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
387386
result, expected = self._return_result_expected(
388387
df,
389388
1000,
389+
temp_file,
390390
r_idx_type,
391391
c_idx_type,
392392
)
@@ -401,18 +401,18 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
401401
"nrows", [10, 98, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
402402
)
403403
@pytest.mark.parametrize("ncols", [1, 2, 3, 4])
404-
def test_to_csv_idx_ncols(self, nrows, ncols):
404+
def test_to_csv_idx_ncols(self, nrows, ncols, temp_file):
405405
df = DataFrame(
406406
np.ones((nrows, ncols)),
407407
index=Index([f"i-{i}" for i in range(nrows)], name="a"),
408408
columns=Index([f"i-{i}" for i in range(ncols)], name="a"),
409409
)
410-
result, expected = self._return_result_expected(df, 1000)
410+
result, expected = self._return_result_expected(df, 1000, temp_file)
411411
tm.assert_frame_equal(result, expected, check_names=False)
412412

413413
@pytest.mark.slow
414414
@pytest.mark.parametrize("nrows", [10, 98, 99, 100, 101, 102])
415-
def test_to_csv_dup_cols(self, nrows):
415+
def test_to_csv_dup_cols(self, nrows, temp_file):
416416
df = DataFrame(
417417
np.ones((nrows, 3)),
418418
index=Index([f"i-{i}" for i in range(nrows)], name="a"),
@@ -427,25 +427,29 @@ def test_to_csv_dup_cols(self, nrows):
427427
ix[-2:] = ["rdupe", "rdupe"]
428428
df.index = ix
429429
df.columns = cols
430-
result, expected = self._return_result_expected(df, 1000, dupe_col=True)
430+
result, expected = self._return_result_expected(
431+
df, 1000, temp_file, dupe_col=True
432+
)
431433
tm.assert_frame_equal(result, expected, check_names=False)
432434

433435
@pytest.mark.slow
434-
def test_to_csv_empty(self):
436+
def test_to_csv_empty(self, temp_file):
435437
df = DataFrame(index=np.arange(10, dtype=np.int64))
436-
result, expected = self._return_result_expected(df, 1000)
438+
result, expected = self._return_result_expected(df, 1000, temp_file)
437439
tm.assert_frame_equal(result, expected, check_column_type=False)
438440

439441
@pytest.mark.slow
440-
def test_to_csv_chunksize(self):
442+
def test_to_csv_chunksize(self, temp_file):
441443
chunksize = 1000
442444
rows = chunksize // 2 + 1
443445
df = DataFrame(
444446
np.ones((rows, 2)),
445447
columns=Index(list("ab")),
446448
index=MultiIndex.from_arrays([range(rows) for _ in range(2)]),
447449
)
448-
result, expected = self._return_result_expected(df, chunksize, rnlvl=2)
450+
result, expected = self._return_result_expected(
451+
df, chunksize, temp_file, rnlvl=2
452+
)
449453
tm.assert_frame_equal(result, expected, check_names=False)
450454

451455
@pytest.mark.slow
@@ -461,7 +465,7 @@ def test_to_csv_chunksize(self):
461465
[{"r_idx_nlevels": 2, "c_idx_nlevels": 2}, {"rnlvl": 2, "cnlvl": 2}],
462466
],
463467
)
464-
def test_to_csv_params(self, nrows, df_params, func_params, ncols):
468+
def test_to_csv_params(self, nrows, df_params, func_params, ncols, temp_file):
465469
if df_params.get("r_idx_nlevels"):
466470
index = MultiIndex.from_arrays(
467471
[f"i-{i}" for i in range(nrows)]
@@ -478,7 +482,9 @@ def test_to_csv_params(self, nrows, df_params, func_params, ncols):
478482
else:
479483
columns = Index([f"i-{i}" for i in range(ncols)])
480484
df = DataFrame(np.ones((nrows, ncols)), index=index, columns=columns)
481-
result, expected = self._return_result_expected(df, 1000, **func_params)
485+
result, expected = self._return_result_expected(
486+
df, 1000, temp_file, **func_params
487+
)
482488
tm.assert_frame_equal(result, expected, check_names=False)
483489

484490
def test_to_csv_from_csv_w_some_infs(self, temp_file, float_frame):
@@ -595,108 +601,104 @@ def test_to_csv_multiindex(self, temp_file, float_frame, datetime_frame):
595601
# needed if setUp becomes class method
596602
datetime_frame.index = old_index
597603

598-
with tm.ensure_clean("__tmp_to_csv_multiindex__") as path:
599-
# GH3571, GH1651, GH3141
600-
601-
def _make_frame(names=None):
602-
if names is True:
603-
names = ["first", "second"]
604-
return DataFrame(
605-
np.random.default_rng(2).integers(0, 10, size=(3, 3)),
606-
columns=MultiIndex.from_tuples(
607-
[("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names
608-
),
609-
dtype="int64",
610-
)
611-
612-
# column & index are multi-index
613-
df = DataFrame(
614-
np.ones((5, 3)),
615-
columns=MultiIndex.from_arrays(
616-
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
617-
),
618-
index=MultiIndex.from_arrays(
619-
[[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab")
620-
),
621-
)
622-
df.to_csv(path)
623-
result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
624-
tm.assert_frame_equal(df, result)
625-
626-
# column is mi
627-
df = DataFrame(
628-
np.ones((5, 3)),
629-
columns=MultiIndex.from_arrays(
630-
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
604+
def _make_frame(names=None):
605+
if names is True:
606+
names = ["first", "second"]
607+
return DataFrame(
608+
np.random.default_rng(2).integers(0, 10, size=(3, 3)),
609+
columns=MultiIndex.from_tuples(
610+
[("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names
631611
),
612+
dtype="int64",
632613
)
633-
df.to_csv(path)
634-
result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
635-
tm.assert_frame_equal(df, result)
636-
637-
# dup column names?
638-
df = DataFrame(
639-
np.ones((5, 3)),
640-
columns=MultiIndex.from_arrays(
641-
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
642-
),
643-
index=MultiIndex.from_arrays(
644-
[[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc")
645-
),
646-
)
647-
df.to_csv(path)
648-
result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
649-
tm.assert_frame_equal(df, result)
650-
651-
# writing with no index
652-
df = _make_frame()
653-
df.to_csv(path, index=False)
654-
result = read_csv(path, header=[0, 1])
655-
tm.assert_frame_equal(df, result)
656-
657-
# we lose the names here
658-
df = _make_frame(True)
659-
df.to_csv(path, index=False)
660-
result = read_csv(path, header=[0, 1])
661-
assert com.all_none(*result.columns.names)
662-
result.columns.names = df.columns.names
663-
tm.assert_frame_equal(df, result)
664-
665-
# whatsnew example
666-
df = _make_frame()
667-
df.to_csv(path)
668-
result = read_csv(path, header=[0, 1], index_col=[0])
669-
tm.assert_frame_equal(df, result)
670-
671-
df = _make_frame(True)
672-
df.to_csv(path)
673-
result = read_csv(path, header=[0, 1], index_col=[0])
674-
tm.assert_frame_equal(df, result)
675-
676-
# invalid options
677-
df = _make_frame(True)
678-
df.to_csv(path)
679-
680-
for i in [6, 7]:
681-
msg = f"len of {i}, but only 5 lines in file"
682-
with pytest.raises(ParserError, match=msg):
683-
read_csv(path, header=list(range(i)), index_col=0)
684-
685-
# write with cols
686-
msg = "cannot specify cols with a MultiIndex"
687-
with pytest.raises(TypeError, match=msg):
688-
df.to_csv(path, columns=["foo", "bar"])
689-
690-
with tm.ensure_clean("__tmp_to_csv_multiindex__") as path:
691-
# empty
692-
tsframe[:0].to_csv(path)
693-
recons = self.read_csv(path)
694-
695-
exp = tsframe[:0]
696-
exp.index = []
697-
698-
tm.assert_index_equal(recons.columns, exp.columns)
699-
assert len(recons) == 0
614+
615+
# column & index are multi-index
616+
df = DataFrame(
617+
np.ones((5, 3)),
618+
columns=MultiIndex.from_arrays(
619+
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
620+
),
621+
index=MultiIndex.from_arrays(
622+
[[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab")
623+
),
624+
)
625+
df.to_csv(temp_file)
626+
result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=[0, 1])
627+
tm.assert_frame_equal(df, result)
628+
629+
# column is mi
630+
df = DataFrame(
631+
np.ones((5, 3)),
632+
columns=MultiIndex.from_arrays(
633+
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
634+
),
635+
)
636+
df.to_csv(temp_file)
637+
result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=0)
638+
tm.assert_frame_equal(df, result)
639+
640+
# dup column names?
641+
df = DataFrame(
642+
np.ones((5, 3)),
643+
columns=MultiIndex.from_arrays(
644+
[[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
645+
),
646+
index=MultiIndex.from_arrays(
647+
[[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc")
648+
),
649+
)
650+
df.to_csv(temp_file)
651+
result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=[0, 1, 2])
652+
tm.assert_frame_equal(df, result)
653+
654+
# writing with no index
655+
df = _make_frame()
656+
df.to_csv(temp_file, index=False)
657+
result = read_csv(temp_file, header=[0, 1])
658+
tm.assert_frame_equal(df, result)
659+
660+
# we lose the names here
661+
df = _make_frame(True)
662+
df.to_csv(temp_file, index=False)
663+
result = read_csv(temp_file, header=[0, 1])
664+
assert com.all_none(*result.columns.names)
665+
result.columns.names = df.columns.names
666+
tm.assert_frame_equal(df, result)
667+
668+
# whatsnew example
669+
df = _make_frame()
670+
df.to_csv(temp_file)
671+
result = read_csv(temp_file, header=[0, 1], index_col=[0])
672+
tm.assert_frame_equal(df, result)
673+
674+
df = _make_frame(True)
675+
df.to_csv(temp_file)
676+
result = read_csv(temp_file, header=[0, 1], index_col=[0])
677+
tm.assert_frame_equal(df, result)
678+
679+
# invalid options
680+
df = _make_frame(True)
681+
df.to_csv(temp_file)
682+
683+
for i in [6, 7]:
684+
msg = f"len of {i}, but only 5 lines in file"
685+
with pytest.raises(ParserError, match=msg):
686+
read_csv(temp_file, header=list(range(i)), index_col=0)
687+
688+
# write with cols
689+
msg = "cannot specify cols with a MultiIndex"
690+
with pytest.raises(TypeError, match=msg):
691+
df.to_csv(temp_file, columns=["foo", "bar"])
692+
693+
# empty
694+
tsframe[:0].to_csv(temp_file)
695+
recons = self.read_csv(temp_file)
696+
697+
exp = tsframe[:0]
698+
exp.index = []
699+
700+
tm.assert_index_equal(recons.columns, exp.columns)
701+
assert len(recons) == 0
700702

701703
def test_to_csv_interval_index(self, temp_file, using_infer_string):
702704
# GH 28210
@@ -808,16 +810,15 @@ def test_to_csv_dups_cols(self, temp_file):
808810

809811
df.columns = [0, 1, 2] * 5
810812

811-
with tm.ensure_clean() as filename:
812-
df.to_csv(filename)
813-
result = read_csv(filename, index_col=0)
813+
df.to_csv(temp_file)
814+
result = read_csv(temp_file, index_col=0)
814815

815-
# date cols
816-
for i in ["0.4", "1.4", "2.4"]:
817-
result[i] = to_datetime(result[i])
816+
# date cols
817+
for i in ["0.4", "1.4", "2.4"]:
818+
result[i] = to_datetime(result[i])
818819

819-
result.columns = df.columns
820-
tm.assert_frame_equal(result, df)
820+
result.columns = df.columns
821+
tm.assert_frame_equal(result, df)
821822

822823
def test_to_csv_dups_cols2(self, temp_file):
823824
# GH3457
@@ -1197,18 +1198,16 @@ def test_to_csv_with_dst_transitions_with_pickle(self, start, end, temp_file):
11971198
idx = idx._with_freq(None) # freq does not round-trip
11981199
idx._data._freq = None # otherwise there is trouble on unpickle
11991200
df = DataFrame({"values": 1, "idx": idx}, index=idx)
1200-
with tm.ensure_clean("csv_date_format_with_dst") as path:
1201-
df.to_csv(path, index=True)
1202-
result = read_csv(path, index_col=0)
1203-
result.index = (
1204-
to_datetime(result.index, utc=True)
1205-
.tz_convert("Europe/Paris")
1206-
.as_unit("ns")
1207-
)
1208-
result["idx"] = to_datetime(result["idx"], utc=True).astype(
1209-
"datetime64[ns, Europe/Paris]"
1210-
)
1211-
tm.assert_frame_equal(result, df)
1201+
1202+
df.to_csv(temp_file, index=True)
1203+
result = read_csv(temp_file, index_col=0)
1204+
result.index = (
1205+
to_datetime(result.index, utc=True).tz_convert("Europe/Paris").as_unit("ns")
1206+
)
1207+
result["idx"] = to_datetime(result["idx"], utc=True).astype(
1208+
"datetime64[ns, Europe/Paris]"
1209+
)
1210+
tm.assert_frame_equal(result, df)
12121211

12131212
# assert working
12141213
df.astype(str)

0 commit comments

Comments
 (0)