diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 5b94f45490da4..e07e0088536e2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -2132,6 +2132,9 @@ def sanitize_objects(ndarray[object] values, set na_values) -> int: if val in na_values: values[i] = onan na_count += 1 + elif val in [0, 1, True, False]: + # Skip memoization, since 1==True and 0==False + values[i] = val elif val in memo: values[i] = memo[val] else: diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 3680273f5e98a..d90e01ca4a40f 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -15,6 +15,7 @@ from pandas._config import using_string_dtype +from pandas._libs import parsers as libparsers from pandas.compat import HAS_PYARROW from pandas.errors import ( EmptyDataError, @@ -830,3 +831,13 @@ def test_read_seek(all_parsers): actual = parser.read_csv(file) expected = parser.read_csv(StringIO(content)) tm.assert_frame_equal(actual, expected) + + +def test_dtype_conversion_in_sanitization(): + # GH60088 + values = np.array([1, True], dtype=object) + expected = np.array([1, True], dtype=object) + libparsers.sanitize_objects(values, na_values=set()) + for v, e in zip(values, expected): + assert v == e + assert type(v) == type(e)