diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py index 875a14d..5e01efb 100644 --- a/pipelines/assets/baseline.py +++ b/pipelines/assets/baseline.py @@ -88,7 +88,17 @@ def get_wealth_group_dataframe( wealth_group_df = wealth_group_df.loc[:, ~wealth_group_df.columns.duplicated()] except ValueError: pass - + # Check if there are unrecognized wealth group category at this point and report + wealth_group_missing_category_df = wealth_group_df[ + wealth_group_df["wealth_group_category"].isnull() + & wealth_group_df["wealth_group_category_original"].notnull() + ] + if not wealth_group_missing_category_df.empty: + unique_values = set(wealth_group_missing_category_df["wealth_group_category_original"].unique()) + raise ValueError( + "%s has unrecognized wealth group category in %s:\n%s" + % (partition_key, worksheet_name, "\n ".join(unique_values)), + ) # Lookup the Community instances community_lookup = CommunityLookup() wealth_group_df["livelihood_zone_baseline"] = livelihood_zone_baseline.id # required parent for lookup