Skip to content

Commit 46b23ba

Browse files
authored
Update fill_nan_with_none to not raise Pandas FutureWarning (#1024)
1 parent b3b721e commit 46b23ba

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

rdt/transformers/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,17 @@ def strings_from_regex(regex, max_repeat=16):
175175
return _from_generators(generators, max_repeat), np.prod(sizes, dtype=np.complex128).real
176176

177177

178+
def _fill_nan_with_none_series(data):
179+
sentinel = object()
180+
dtype = data.dtype
181+
if isinstance(dtype, pd.CategoricalDtype):
182+
data = data.cat.add_categories([sentinel])
183+
data = data.fillna(sentinel).replace({sentinel: None})
184+
return pd.Series(pd.Categorical(data, categories=dtype.categories), index=data.index)
185+
186+
return data.fillna(sentinel).replace({sentinel: None})
187+
188+
178189
def fill_nan_with_none(data):
179190
"""Replace all nan values with None.
180191
@@ -185,7 +196,10 @@ def fill_nan_with_none(data):
185196
data:
186197
Original data with nan values replaced by None.
187198
"""
188-
return data.infer_objects().fillna(np.nan).replace([np.nan], [None])
199+
if isinstance(data, pd.DataFrame):
200+
return data.apply(_fill_nan_with_none_series)
201+
202+
return _fill_nan_with_none_series(data)
189203

190204

191205
def flatten_column_list(column_list):

tests/unit/transformers/test_utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_any,
1717
_cast_to_type,
1818
_extract_timezone_from_a_string,
19+
_fill_nan_with_none_series,
1920
_get_utc_offset,
2021
_handle_enforce_uniqueness_and_cardinality_rule,
2122
_max_repeat,
@@ -167,6 +168,45 @@ def test_fill_nan_with_none_no_warning():
167168
pd.testing.assert_series_equal(result, expected)
168169

169170

171+
def test__fill_nan_with_none_series():
172+
"""Test the ``_fill_nan_with_none_series`` method."""
173+
# Setup
174+
series = pd.Series([1.0, 2.0, 3.0, np.nan], dtype='object')
175+
categorical_serie = pd.Series(['a', 'b', 'c', 'd', np.nan], dtype='category')
176+
177+
# Run
178+
result = _fill_nan_with_none_series(series)
179+
result_categorical = _fill_nan_with_none_series(categorical_serie)
180+
181+
# Assert
182+
expected_result = pd.Series([1.0, 2.0, 3.0, None], dtype='object')
183+
pd.testing.assert_series_equal(result, expected_result)
184+
expected_result_categorical = pd.Series(
185+
pd.Categorical(['a', 'b', 'c', 'd', None], categories=['a', 'b', 'c', 'd'])
186+
)
187+
pd.testing.assert_series_equal(result_categorical, expected_result_categorical)
188+
189+
190+
def test_fill_nan_with_none_series():
191+
"""Test the `fill_nan_with_none_series` function."""
192+
# Setup
193+
series = pd.Series([1.0, 2.0, 3.0, np.nan], dtype='object')
194+
data = pd.DataFrame({'col1': series})
195+
data_2 = pd.DataFrame({'col1': series, 'col2': ['a', 'b', 'c', np.nan]})
196+
197+
# Run
198+
result_series = _fill_nan_with_none_series(series)
199+
result_data = fill_nan_with_none(data)
200+
result_data_2 = fill_nan_with_none(data_2)
201+
202+
# Assert
203+
expected_result = pd.Series([1.0, 2.0, 3.0, None], dtype='object')
204+
expected_result_data_2 = pd.DataFrame({'col1': expected_result, 'col2': ['a', 'b', 'c', None]})
205+
pd.testing.assert_series_equal(result_series, expected_result)
206+
pd.testing.assert_frame_equal(result_data, pd.DataFrame({'col1': expected_result}))
207+
pd.testing.assert_frame_equal(result_data_2, expected_result_data_2)
208+
209+
170210
def test_check_nan_in_transform():
171211
"""Test ``check_nan_in_transform`` method.
172212

0 commit comments

Comments
 (0)