diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 5df8c3d27bf84..50d42f15cc0f8 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -9,7 +9,6 @@ ) import os import tempfile -import uuid import numpy as np import pytest @@ -51,7 +50,7 @@ def test_read_csv_unicode(all_parsers): @skip_pyarrow @pytest.mark.parametrize("sep", [",", "\t"]) @pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"]) -def test_utf16_bom_skiprows(all_parsers, sep, encoding): +def test_utf16_bom_skiprows(all_parsers, sep, encoding, temp_file): # see gh-2298 parser = all_parsers data = """skip this @@ -59,20 +58,18 @@ def test_utf16_bom_skiprows(all_parsers, sep, encoding): A,B,C 1,2,3 4,5,6""".replace(",", sep) - path = f"__{uuid.uuid4()}__.csv" kwargs = {"sep": sep, "skiprows": 2} utf8 = "utf-8" - with tm.ensure_clean(path) as path: - bytes_data = data.encode(encoding) + bytes_data = data.encode(encoding) - with open(path, "wb") as f: - f.write(bytes_data) + with open(temp_file, "wb") as f: + f.write(bytes_data) - with TextIOWrapper(BytesIO(data.encode(utf8)), encoding=utf8) as bytes_buffer: - result = parser.read_csv(path, encoding=encoding, **kwargs) - expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs) - tm.assert_frame_equal(result, expected) + with TextIOWrapper(BytesIO(data.encode(utf8)), encoding=utf8) as bytes_buffer: + result = parser.read_csv(temp_file, encoding=encoding, **kwargs) + expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs) + tm.assert_frame_equal(result, expected) def test_utf16_example(all_parsers, csv_dir_path): @@ -240,7 +237,7 @@ def test_parse_encoded_special_characters(encoding): @pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"]) -def test_encoding_memory_map(all_parsers, encoding): +def test_encoding_memory_map(all_parsers, encoding, temp_file): # GH40986 parser = all_parsers expected = DataFrame( @@ -250,20 +247,19 @@ def test_encoding_memory_map(all_parsers, encoding): "weapon": ["sai", "bo staff", "nunchunk", "katana"], } ) - with tm.ensure_clean() as file: - expected.to_csv(file, index=False, encoding=encoding) + expected.to_csv(temp_file, index=False, encoding=encoding) - if parser.engine == "pyarrow": - msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - parser.read_csv(file, encoding=encoding, memory_map=True) - return + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(temp_file, encoding=encoding, memory_map=True) + return - df = parser.read_csv(file, encoding=encoding, memory_map=True) + df = parser.read_csv(temp_file, encoding=encoding, memory_map=True) tm.assert_frame_equal(df, expected) -def test_chunk_splits_multibyte_char(all_parsers): +def test_chunk_splits_multibyte_char(all_parsers, temp_file): """ Chunk splits a multibyte character with memory_map=True @@ -276,20 +272,19 @@ def test_chunk_splits_multibyte_char(all_parsers): # Put two-bytes utf-8 encoded character "ą" at the end of chunk # utf-8 encoding of "ą" is b'\xc4\x85' df.iloc[2047] = "a" * 127 + "ą" - with tm.ensure_clean("bug-gh43540.csv") as fname: - df.to_csv(fname, index=False, header=False, encoding="utf-8") + df.to_csv(temp_file, index=False, header=False, encoding="utf-8") - if parser.engine == "pyarrow": - msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - parser.read_csv(fname, header=None, memory_map=True) - return + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(temp_file, header=None, memory_map=True) + return - dfr = parser.read_csv(fname, header=None, memory_map=True) + dfr = parser.read_csv(temp_file, header=None, memory_map=True) tm.assert_frame_equal(dfr, df) -def test_readcsv_memmap_utf8(all_parsers): +def test_readcsv_memmap_utf8(all_parsers, temp_file): """ GH 43787 @@ -310,16 +305,15 @@ def test_readcsv_memmap_utf8(all_parsers): lines.append(line) parser = all_parsers df = DataFrame(lines) - with tm.ensure_clean("utf8test.csv") as fname: - df.to_csv(fname, index=False, header=False, encoding="utf-8") + df.to_csv(temp_file, index=False, header=False, encoding="utf-8") - if parser.engine == "pyarrow": - msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8") - return + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(temp_file, header=None, memory_map=True, encoding="utf-8") + return - dfr = parser.read_csv(fname, header=None, memory_map=True, encoding="utf-8") + dfr = parser.read_csv(temp_file, header=None, memory_map=True, encoding="utf-8") tm.assert_frame_equal(df, dfr) diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 62cd515366bb9..825bc3a7f9f96 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -31,11 +31,10 @@ def iterparse(request): return request.param -def read_xml_iterparse(data, **kwargs): - with tm.ensure_clean() as path: - with open(path, "w", encoding="utf-8") as f: - f.write(data) - return read_xml(path, **kwargs) +def read_xml_iterparse(data, temp_file, **kwargs): + with open(temp_file, "w", encoding="utf-8") as f: + f.write(data) + return read_xml(temp_file, **kwargs) xml_types = """\ @@ -84,13 +83,14 @@ def read_xml_iterparse(data, **kwargs): # DTYPE -def test_dtype_single_str(parser): +def test_dtype_single_str(parser, temp_file): df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser) df_iter = read_xml_iterparse( xml_types, parser=parser, dtype={"degrees": "str"}, iterparse={"row": ["shape", "degrees", "sides"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -105,13 +105,14 @@ def test_dtype_single_str(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_dtypes_all_str(parser): +def test_dtypes_all_str(parser, temp_file): df_result = read_xml(StringIO(xml_dates), dtype="string", parser=parser) df_iter = read_xml_iterparse( xml_dates, parser=parser, dtype="string", iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -128,7 +129,7 @@ def test_dtypes_all_str(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_dtypes_with_names(parser): +def test_dtypes_with_names(parser, temp_file): df_result = read_xml( StringIO(xml_dates), names=["Col1", "Col2", "Col3", "Col4"], @@ -141,6 +142,7 @@ def test_dtypes_with_names(parser): names=["Col1", "Col2", "Col3", "Col4"], dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"}, iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -158,13 +160,14 @@ def test_dtypes_with_names(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_dtype_nullable_int(parser): +def test_dtype_nullable_int(parser, temp_file): df_result = read_xml(StringIO(xml_types), dtype={"sides": "Int64"}, parser=parser) df_iter = read_xml_iterparse( xml_types, parser=parser, dtype={"sides": "Int64"}, iterparse={"row": ["shape", "degrees", "sides"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -179,13 +182,14 @@ def test_dtype_nullable_int(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_dtype_float(parser): +def test_dtype_float(parser, temp_file): df_result = read_xml(StringIO(xml_types), dtype={"degrees": "float"}, parser=parser) df_iter = read_xml_iterparse( xml_types, parser=parser, dtype={"degrees": "float"}, iterparse={"row": ["shape", "degrees", "sides"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -209,7 +213,7 @@ def test_wrong_dtype(xml_books, parser, iterparse): ) -def test_both_dtype_converters(parser): +def test_both_dtype_converters(parser, temp_file): df_expected = DataFrame( { "shape": ["square", "circle", "triangle"], @@ -231,6 +235,7 @@ def test_both_dtype_converters(parser): converters={"degrees": str}, parser=parser, iterparse={"row": ["shape", "degrees", "sides"]}, + temp_file=temp_file, ) tm.assert_frame_equal(df_result, df_expected) @@ -240,7 +245,7 @@ def test_both_dtype_converters(parser): # CONVERTERS -def test_converters_str(parser): +def test_converters_str(parser, temp_file): df_result = read_xml( StringIO(xml_types), converters={"degrees": str}, parser=parser ) @@ -249,6 +254,7 @@ def test_converters_str(parser): parser=parser, converters={"degrees": str}, iterparse={"row": ["shape", "degrees", "sides"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -263,7 +269,7 @@ def test_converters_str(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_converters_date(parser): +def test_converters_date(parser, temp_file): convert_to_datetime = lambda x: to_datetime(x) df_result = read_xml( StringIO(xml_dates), converters={"date": convert_to_datetime}, parser=parser @@ -273,6 +279,7 @@ def test_converters_date(parser): parser=parser, converters={"date": convert_to_datetime}, iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -312,13 +319,14 @@ def test_callable_str_converters(xml_books, parser, iterparse): # PARSE DATES -def test_parse_dates_column_name(parser): +def test_parse_dates_column_name(parser, temp_file): df_result = read_xml(StringIO(xml_dates), parse_dates=["date"], parser=parser) df_iter = read_xml_iterparse( xml_dates, parser=parser, parse_dates=["date"], iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -334,13 +342,14 @@ def test_parse_dates_column_name(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_parse_dates_column_index(parser): +def test_parse_dates_column_index(parser, temp_file): df_result = read_xml(StringIO(xml_dates), parse_dates=[3], parser=parser) df_iter = read_xml_iterparse( xml_dates, parser=parser, parse_dates=[3], iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -356,7 +365,7 @@ def test_parse_dates_column_index(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_parse_dates_true(parser): +def test_parse_dates_true(parser, temp_file): df_result = read_xml(StringIO(xml_dates), parse_dates=True, parser=parser) df_iter = read_xml_iterparse( @@ -364,6 +373,7 @@ def test_parse_dates_true(parser): parser=parser, parse_dates=True, iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) df_expected = DataFrame( @@ -379,7 +389,7 @@ def test_parse_dates_true(parser): tm.assert_frame_equal(df_iter, df_expected) -def test_day_first_parse_dates(parser): +def test_day_first_parse_dates(parser, temp_file): xml = """\ @@ -421,6 +431,7 @@ def test_day_first_parse_dates(parser): parse_dates=["date"], parser=parser, iterparse={"row": ["shape", "degrees", "sides", "date"]}, + temp_file=temp_file, ) tm.assert_frame_equal(df_result, df_expected)