Skip to content

Commit 1bae9ad

Browse files
committed
still fixing groups
1 parent 7e8df85 commit 1bae9ad

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

climada/trajectories/risk_trajectory.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ def _npv_group(group, disc):
249249
dropna=False,
250250
as_index=False,
251251
group_keys=False,
252+
observed=False,
252253
)["risk"].transform(_npv_group, risk_disc)
253254
df = df.reset_index()
254255
return df
@@ -509,9 +510,9 @@ def identify_continuous_periods(group, time_unit):
509510

510511
df_sorted = df.sort_values(by=cls._grouper + ["date"])
511512
# Apply the function to identify continuous periods
512-
df_periods = df_sorted.groupby(grouper, dropna=False, group_keys=False).apply(
513-
identify_continuous_periods, time_unit
514-
)
513+
df_periods = df_sorted.groupby(
514+
grouper, dropna=False, group_keys=False, observed=False
515+
).apply(identify_continuous_periods, time_unit)
515516

516517
if isinstance(colname, str):
517518
colname = [colname]
@@ -524,7 +525,7 @@ def identify_continuous_periods(group, time_unit):
524525
agg_dict[col] = pd.NamedAgg(column=col, aggfunc="sum")
525526
# Group by the identified periods and calculate start and end dates
526527
df_periods = (
527-
df_periods.groupby(grouper + ["period_id"], dropna=False)
528+
df_periods.groupby(grouper + ["period_id"], dropna=False, observed=False)
528529
.agg(**agg_dict)
529530
.reset_index()
530531
)

climada/trajectories/riskperiod.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ def __init__(
147147
if "group_id" in self.snapshot1.exposure.gdf.columns
148148
else np.array([])
149149
)
150+
self._groups_id = np.unique(
151+
np.concatenate([self._group_id_E0, self._group_id_E1])
152+
)
150153

151154
def _reset_impact_data(self):
152155
for fut in list(itertools.product([0, 1], repeat=3)):
@@ -690,7 +693,7 @@ def calc_eai_gdf(self):
690693
eai_gdf["coord_id"] = eai_gdf.index
691694
eai_gdf = eai_gdf.merge(df, on="coord_id")
692695
eai_gdf = eai_gdf.rename(columns={"group_id": "group"})
693-
eai_gdf["group"] = eai_gdf["group"].astype("category")
696+
eai_gdf["group"] = pd.Categorical(eai_gdf["group"], categories=self._groups_id)
694697
eai_gdf["metric"] = "eai"
695698
eai_gdf["measure"] = self.measure.name if self.measure else "no_measure"
696699
return eai_gdf
@@ -699,7 +702,9 @@ def calc_aai_metric(self):
699702
aai_df = pd.DataFrame(
700703
index=self.date_idx, columns=["risk"], data=self.per_date_aai
701704
)
702-
aai_df["group"] = pd.NA
705+
aai_df["group"] = pd.Categorical(
706+
[pd.NA] * len(aai_df), categories=self._groups_id
707+
)
703708
aai_df["metric"] = "aai"
704709
aai_df["measure"] = self.measure.name if self.measure else "no_measure"
705710
aai_df.reset_index(inplace=True)
@@ -713,19 +718,17 @@ def calc_aai_per_group_metric(self):
713718
return pd.DataFrame()
714719

715720
eai_pres_groups = self.eai_gdf[["date", "coord_id", "group", "risk"]].copy()
716-
aai_per_group_df = eai_pres_groups.groupby(["date", "group"], as_index=False)[
717-
"risk"
718-
].sum()
721+
aai_per_group_df = eai_pres_groups.groupby(
722+
["date", "group"], as_index=False, observed=False
723+
)["risk"].sum()
719724
if not np.array_equal(self._group_id_E0, self._group_id_E1):
720725
LOGGER.warning(
721726
"Group id are changing between present and future snapshot. Per group AAI will be linearly interpolated."
722727
)
723728
eai_fut_groups = self.eai_gdf.copy()
724729
eai_fut_groups["group"] = pd.Categorical(
725730
np.tile(self._group_id_E1, len(self.date_idx)),
726-
categories=np.unique(
727-
np.concatenate([self._group_id_E0, self._group_id_E1])
728-
),
731+
categories=self._groups_id,
729732
)
730733
aai_fut_groups = eai_fut_groups.groupby(["date", "group"], as_index=False)[
731734
"risk"
@@ -761,7 +764,9 @@ def calc_return_periods_metric(self, return_periods):
761764
index=self.date_idx, columns=return_periods, data=per_date_rp
762765
).melt(value_name="risk", var_name="rp", ignore_index=False)
763766
rp_df.reset_index(inplace=True)
764-
rp_df["group"] = pd.NA
767+
rp_df["group"] = pd.Categorical(
768+
[pd.NA] * len(rp_df), categories=self._groups_id
769+
)
765770
rp_df["metric"] = "rp_" + rp_df["rp"].astype(str)
766771
rp_df["measure"] = self.measure.name if self.measure else "no_measure"
767772
return rp_df
@@ -806,7 +811,7 @@ def calc_risk_components_metric(self):
806811
ignore_index=False,
807812
)
808813
df.reset_index(inplace=True)
809-
df["group"] = pd.NA
814+
df["group"] = pd.Categorical([pd.NA] * len(df), categories=self._groups_id)
810815
df["measure"] = self.measure.name if self.measure else "no_measure"
811816
return df
812817

0 commit comments

Comments
 (0)