Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@ Other
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
- Bug in :meth:`_clean_na_values` in :class:`TextFileReader` that was not properly handling ``na_values`` when it is a list of strings. (:issue:`59303`)
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,7 @@ def _clean_na_values(na_values, keep_default_na: bool = True, floatify: bool = T
if keep_default_na:
v = set(v) | STR_NA_VALUES

na_values[k] = v
na_values[k] = _stringify_na_values(v, floatify)
na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
else:
if not is_list_like(na_values):
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/test_na_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,3 +812,23 @@ def test_bool_and_nan_to_float(all_parsers):
result = parser.read_csv(StringIO(data), dtype="float")
expected = DataFrame.from_dict({"0": [np.nan, 1.0, 0.0]})
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
def test_na_values_dict_without_dtype(all_parsers):
# GH#59303
parser = all_parsers
data = """A
-99
-99
-99.0
-99.0"""
# this would FAIL BEFORE this fix
result_1 = parser.read_csv(StringIO(data), na_values={"A": [-99.0, -99]})
expected_1 = DataFrame.from_dict({"A": [np.nan, np.nan, np.nan, np.nan]})
tm.assert_frame_equal(result_1, expected_1)

# this would PASS even BEFORE this fix
result_2 = parser.read_csv(StringIO(data), na_values={"A": [-99, -99.0]})
expected_2 = DataFrame.from_dict({"A": [np.nan, np.nan, np.nan, np.nan]})
tm.assert_frame_equal(result_2, expected_2)