Skip to content

Commit 52663bf

Browse files
Merge pull request #62 from dataiku/fix-tzlocalize-tzconvert
Fix tz conversion
2 parents 0dda388 + 7af8bc3 commit 52663bf

File tree

5 files changed

+11
-4
lines changed

5 files changed

+11
-4
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## Version 2.1.2 - Bugfix release - 2025-05
4+
### Resampling recipe
5+
- :bug: fix 3.8/3.11 python support
36

47
## Version 2.1.1 - Bugfix release - 2025-04
58
### Resampling recipe

custom-recipes/timeseries-preparation-resampling/recipe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,12 @@
3434
output_df = resampler.transform(df, datetime_column, groupby_columns=groupby_columns)
3535

3636
# int columns must be resampled into int values
37-
columns_to_round = [
37+
dss_expected_integer_columns = [
3838
column["name"]
3939
for column in schema
4040
if column["type"] in ["tinyint", "smallint", "int", "bigint"]
4141
]
42+
columns_to_round = [c for c in df.select_dtypes(include="inexact").columns if c in dss_expected_integer_columns]
4243
output_df[columns_to_round] = output_df[columns_to_round].round()
4344

4445

plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id": "timeseries-preparation",
3-
"version": "2.1.1",
3+
"version": "2.1.2",
44
"meta": {
55
"supportLevel": "SUPPORTED",
66
"label": "Time Series Preparation",

python-lib/dku_timeseries/resampling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,10 @@ def _fill_in_category_values(self, df, category_columns):
240240
for col in category_columns:
241241
# only perform conversion if the column has a timezone
242242
if pd.api.types.is_datetime64_any_dtype(category_filled_df[col]) and category_filled_df[col].dt.tz is not None:
243-
most_frequent_categoricals[col] = most_frequent_categoricals[col].tz_localize("UTC")
243+
if most_frequent_categoricals[col].tzinfo is None: # tz-naive timestamp -> localize
244+
most_frequent_categoricals[col] = most_frequent_categoricals[col].tz_localize("UTC")
245+
else: # tz_convert
246+
most_frequent_categoricals[col] = most_frequent_categoricals[col].tz_convert("UTC")
244247

245248
category_filled_df.loc[:, category_columns] = category_filled_df.loc[:, category_columns].fillna(most_frequent_categoricals)
246249
return category_filled_df

tests/python/unit/dku_timeseries/resampling/test_category_methods.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def test_df_multiple_dates(self, df_multiple_dates, config,columns):
289289
params = get_resampling_params(config)
290290
resampler = Resampler(params)
291291
output_df = resampler.transform(df_multiple_dates, columns.date)
292-
assert pd.isnull(output_df.loc[1, "date2"])
292+
np.testing.assert_array_equal(pd.to_datetime(output_df['date2']).map(lambda s: s.strftime('%Y-%m-%d')), np.array(["2013-01-01", "2013-01-01", "2013-01-01", "2013-01-01", "2013-01-03", "2013-01-03", "2013-01-04", "2013-01-04", "2013-01-05", "2013-01-05", "2013-01-06"]))
293293

294294
def test_bool_column(self, bool_df, config,columns):
295295
config["category_imputation_method"] = "previous"

0 commit comments

Comments
 (0)