Skip to content

Commit 58dc347

Browse files
authored
Pandas 2 assign_in_place() fix with categoricals (#948)
* update cat columns in df when new cat exists * blacken * sort two categoricals before union
1 parent 1027026 commit 58dc347

File tree

4 files changed

+27
-4
lines changed

4 files changed

+27
-4
lines changed

activitysim/abm/models/util/mode.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ def mode_choice_simulate(
8888
choices[mode_column_name] = choices[mode_column_name].map(
8989
dict(list(zip(list(range(len(alts))), alts)))
9090
)
91-
cat_type = pd.api.types.CategoricalDtype([""] + alts.tolist(), ordered=True)
91+
# note: do we want to order the mode categories?
92+
cat_type = pd.api.types.CategoricalDtype([""] + alts.tolist())
9293
choices[mode_column_name] = choices[mode_column_name].astype(cat_type)
9394

9495
return choices

activitysim/abm/models/util/school_escort_tours_trips.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,9 @@ def merge_school_escort_trips_into_pipeline(state: workflow.State):
767767
if isinstance(school_escort_trips[c].dtype, pd.api.types.CategoricalDtype):
768768
from pandas.api.types import union_categoricals
769769

770-
uc = union_categoricals([trips[c], school_escort_trips[c]])
770+
uc = union_categoricals(
771+
[trips[c], school_escort_trips[c]], sort_categories=True
772+
)
771773
trips[c] = pd.Categorical(trips[c], categories=uc.categories)
772774
school_escort_trips[c] = pd.Categorical(
773775
school_escort_trips[c], categories=uc.categories

activitysim/core/util.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,24 @@ def assign_in_place(df, df2, downcast_int=False, downcast_float=False):
342342
common_columns = df2.columns.intersection(df.columns)
343343
if len(common_columns) > 0:
344344
old_dtypes = [df[c].dtype for c in common_columns]
345+
# in pandas 2.x, update a categorical column with any new categories will cause TypeError
346+
# so we need to add the new categories first
347+
# this is a workaround for pandas 2.x, see discussion in
348+
# https://github.com/ActivitySim/activitysim/discussions/946
349+
for c in common_columns:
350+
if isinstance(df[c].dtype, pd.CategoricalDtype):
351+
if not isinstance(df2[c].dtype, pd.CategoricalDtype):
352+
# if df column is categorical, but df2 column is not
353+
# convert df2 column to categorical then union categories
354+
df2[c] = df2[c].astype("category")
355+
356+
# when df and df2 column are both categorical, union categories
357+
from pandas.api.types import union_categoricals
358+
359+
uc = union_categoricals([df[c], df2[c]], sort_categories=True)
360+
df[c] = pd.Categorical(df[c], categories=uc.categories)
361+
df2[c] = pd.Categorical(df2[c], categories=uc.categories)
362+
345363
df.update(df2)
346364

347365
# avoid needlessly changing int columns to float
@@ -428,7 +446,7 @@ def auto_opt_pd_dtypes(
428446
else:
429447
df[col] = pd.to_numeric(df[col], downcast="float")
430448
# Skip if the column is already categorical
431-
if pd.api.types.is_categorical_dtype(dtype):
449+
if isinstance(dtype, pd.CategoricalDtype):
432450
continue
433451
# Handle integer types
434452
if pd.api.types.is_integer_dtype(dtype):

activitysim/core/workflow/state.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,9 @@ def extend_table(self, table_name, df, axis=0):
10991099
if isinstance(df[c].dtype, pd.api.types.CategoricalDtype):
11001100
from pandas.api.types import union_categoricals
11011101

1102-
uc = union_categoricals([table_df[c], df[c]])
1102+
uc = union_categoricals(
1103+
[table_df[c], df[c]], sort_categories=True
1104+
)
11031105
table_df[c] = pd.Categorical(
11041106
table_df[c], categories=uc.categories
11051107
)

0 commit comments

Comments
 (0)