Skip to content

Commit 4fc1469

Browse files
committed
make release-tag: Merge branch 'main' into stable
2 parents a0bae5f + 8559491 commit 4fc1469

File tree

9 files changed

+101
-13
lines changed

9 files changed

+101
-13
lines changed

HISTORY.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
11
# History
22

3+
## v1.18.2 - 2025-10-15
4+
5+
### Bugs Fixed
6+
7+
* Error when fitting using nullable integer data with categorical transformer - Issue [#1036](https://github.com/sdv-dev/RDT/issues/1036) by @R-Palazzo
8+
9+
### Maintenance
10+
11+
* Remove excluding Faker v37.11.0 when they have released the fix for python 3.9 - Issue [#1043](https://github.com/sdv-dev/RDT/issues/1043) by @R-Palazzo
12+
13+
## v1.18.1 - 2025-09-11
14+
15+
### Bugs Fixed
16+
17+
* fill_nan_with_none produces pandas FutureWarning - Issue [#1027](https://github.com/sdv-dev/RDT/issues/1027) by @R-Palazzo
18+
319
## v1.18.0 - 2025-08-14
420

521
### New Features

latest_requirements.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
Faker==37.5.3
1+
Faker==37.8.0
22
copulas==0.12.3
3-
numpy==2.3.2
4-
pandas==2.3.1
5-
scikit-learn==1.7.1
6-
scipy==1.16.1
3+
numpy==2.3.3
4+
pandas==2.3.3
5+
scikit-learn==1.7.2
6+
scipy==1.16.2

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies = [
3939
"scikit-learn>=1.1.3;python_version>='3.11' and python_version<'3.12'",
4040
"scikit-learn>=1.3.1;python_version>='3.12' and python_version<'3.13'",
4141
"scikit-learn>=1.5.2;python_version>='3.13'",
42-
'Faker>=17',
42+
'Faker>=17, !=37.11.0',
4343
'python-dateutil>=2.9',
4444
]
4545

@@ -144,7 +144,7 @@ collect_ignore = ['pyproject.toml']
144144
exclude_lines = ['NotImplementedError()']
145145

146146
[tool.bumpversion]
147-
current_version = "1.18.0"
147+
current_version = "1.18.2.dev1"
148148
parse = '(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?'
149149
serialize = [
150150
'{major}.{minor}.{patch}.{release}{candidate}',

rdt/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
__author__ = 'DataCebo, Inc.'
66
__email__ = '[email protected]'
7-
__version__ = '1.18.0'
7+
__version__ = '1.18.2.dev1'
88

99

1010
import sys

rdt/transformers/categorical.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ def _order_categories(self, unique_data):
5959
nans = pd.isna(unique_data)
6060
if self.order_by == 'alphabetical':
6161
# pylint: disable=invalid-unary-operand-type
62-
if any(map(lambda item: not isinstance(item, str), unique_data[~nans])): # noqa: C417
62+
if any(not isinstance(item, str) for item in unique_data[~nans]):
6363
raise TransformerInputError(
6464
"The data must be of type string if order_by is 'alphabetical'."
6565
)
6666
elif self.order_by == 'numerical_value':
67-
if not np.issubdtype(unique_data.dtype.type, np.number):
67+
if any(not np.issubdtype(type(item), np.number) for item in unique_data[~nans]):
6868
raise TransformerInputError(
6969
"The data must be numerical if order_by is 'numerical_value'."
7070
)

rdt/transformers/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,17 @@ def strings_from_regex(regex, max_repeat=16):
175175
return _from_generators(generators, max_repeat), np.prod(sizes, dtype=np.complex128).real
176176

177177

178+
def _fill_nan_with_none_series(data):
179+
dtype = data.dtype
180+
if isinstance(dtype, pd.CategoricalDtype):
181+
sentinel = object()
182+
data = data.cat.add_categories([sentinel])
183+
data = data.fillna(sentinel).replace({sentinel: None})
184+
return pd.Series(pd.Categorical(data, categories=dtype.categories), index=data.index)
185+
186+
return data.astype('object').where(~data.isna(), None)
187+
188+
178189
def fill_nan_with_none(data):
179190
"""Replace all nan values with None.
180191
@@ -185,7 +196,10 @@ def fill_nan_with_none(data):
185196
data:
186197
Original data with nan values replaced by None.
187198
"""
188-
return data.infer_objects().fillna(np.nan).replace([np.nan], [None])
199+
if isinstance(data, pd.DataFrame):
200+
return data.apply(_fill_nan_with_none_series)
201+
202+
return _fill_nan_with_none_series(data)
189203

190204

191205
def flatten_column_list(column_list):

static_code_analysis.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
Run started:2025-08-14 20:37:52.987737
1+
Run started:2025-10-15 16:09:17.276277
22

33
Test results:
44
No issues identified.
55

66
Code scanned:
7-
Total lines of code: 6323
7+
Total lines of code: 6333
88
Total lines skipped (#nosec): 0
99
Total potential issues skipped due to specifically being disabled (e.g., #nosec BXXX): 0
1010

tests/unit/transformers/test_categorical.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,24 @@ def test__fit(self):
198198
assert transformer.frequencies == expected_frequencies
199199
assert transformer.intervals == expected_intervals
200200

201+
def test_fit_with_nullable_integer_dtype(self):
202+
"""Test that the ``fit`` method works with nullable integer columns."""
203+
# Setup
204+
data = pd.DataFrame({'example': [1, 2, 3, None]}, dtype='Int64')
205+
transformer = UniformEncoder()
206+
207+
# Run
208+
transformer.fit(data=data, column='example')
209+
210+
# Assert
211+
expected_frequencies = {
212+
1: 0.25,
213+
2: 0.25,
214+
3: 0.25,
215+
None: 0.25,
216+
}
217+
assert transformer.frequencies == expected_frequencies
218+
201219
def test__set_fitted_parameters(self):
202220
"""Test the ``_set_fitted_parameters`` method."""
203221
# Setup

tests/unit/transformers/test_utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
_any,
1717
_cast_to_type,
1818
_extract_timezone_from_a_string,
19+
_fill_nan_with_none_series,
1920
_get_utc_offset,
2021
_handle_enforce_uniqueness_and_cardinality_rule,
2122
_max_repeat,
@@ -167,6 +168,45 @@ def test_fill_nan_with_none_no_warning():
167168
pd.testing.assert_series_equal(result, expected)
168169

169170

171+
def test__fill_nan_with_none_series():
172+
"""Test the ``_fill_nan_with_none_series`` method."""
173+
# Setup
174+
series = pd.Series([1.0, 2.0, 3.0, np.nan], dtype='object')
175+
categorical_serie = pd.Series(['a', 'b', 'c', 'd', np.nan], dtype='category')
176+
177+
# Run
178+
result = _fill_nan_with_none_series(series)
179+
result_categorical = _fill_nan_with_none_series(categorical_serie)
180+
181+
# Assert
182+
expected_result = pd.Series([1.0, 2.0, 3.0, None], dtype='object')
183+
pd.testing.assert_series_equal(result, expected_result)
184+
expected_result_categorical = pd.Series(
185+
pd.Categorical(['a', 'b', 'c', 'd', None], categories=['a', 'b', 'c', 'd'])
186+
)
187+
pd.testing.assert_series_equal(result_categorical, expected_result_categorical)
188+
189+
190+
def test_fill_nan_with_none_series():
191+
"""Test the `fill_nan_with_none_series` function."""
192+
# Setup
193+
series = pd.Series([1.0, 2.0, 3.0, np.nan], dtype='object')
194+
data = pd.DataFrame({'col1': series})
195+
data_2 = pd.DataFrame({'col1': series, 'col2': ['a', 'b', 'c', np.nan]})
196+
197+
# Run
198+
result_series = _fill_nan_with_none_series(series)
199+
result_data = fill_nan_with_none(data)
200+
result_data_2 = fill_nan_with_none(data_2)
201+
202+
# Assert
203+
expected_result = pd.Series([1.0, 2.0, 3.0, None], dtype='object')
204+
expected_result_data_2 = pd.DataFrame({'col1': expected_result, 'col2': ['a', 'b', 'c', None]})
205+
pd.testing.assert_series_equal(result_series, expected_result)
206+
pd.testing.assert_frame_equal(result_data, pd.DataFrame({'col1': expected_result}))
207+
pd.testing.assert_frame_equal(result_data_2, expected_result_data_2)
208+
209+
170210
def test_check_nan_in_transform():
171211
"""Test ``check_nan_in_transform`` method.
172212

0 commit comments

Comments
 (0)