Skip to content

Commit 16661c0

Browse files
committed
Refactor wealth_characteristic_instances for clarity - see HEA-572
1 parent 1296645 commit 16661c0

File tree

1 file changed

+24
-14
lines changed

1 file changed

+24
-14
lines changed

pipelines/assets/wealth_characteristic.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@
8585
import django
8686
import pandas as pd
8787
from dagster import AssetExecutionContext, MetadataValue, Output, asset
88-
from openpyxl.utils import get_column_letter
8988

9089
from ..configs import BSSMetadataConfig
9190
from ..partitions import bss_instances_partitions_def
@@ -197,7 +196,9 @@ def wealth_characteristic_instances(
197196
context.log.info("Loaded %d Wealth Characteristic Labels", len(label_map))
198197

199198
# Get a dataframe of the Wealth Groups for each column
200-
wealth_group_df = get_wealth_group_dataframe(df, livelihood_zone_baseline, "WB", partition_key)
199+
wealth_group_df = get_wealth_group_dataframe(df, livelihood_zone_baseline, "WB", partition_key).set_index(
200+
"bss_column", drop=False
201+
)
201202

202203
# Prepare the label column for matching against the label_map
203204
prepared_labels = prepare_lookup(df["A"])
@@ -257,21 +258,21 @@ def wealth_characteristic_instances(
257258
# Iterate over the value columns, from Column C to the the Summary Column.
258259
# We don't iterate over the last two columns because they contain the min_value and max_value that are
259260
# part of the Summary Wealth Characteristic Value rather than a separate Wealth Characteristic Value.
260-
for i, value in enumerate(df.loc[row, "C" : df.columns[-3]]):
261-
# Store the column to aid trouble-shooting.
262-
# We need col_index + 1 to get the letter, and the enumerate is already starting from col C
263-
column = get_column_letter(i + 3)
261+
for column in df.columns[2:-2]:
262+
value = df.loc[row, column]
264263
try:
265264
# Add find the reference_type:
266265
# Wealth Group (Form 4) values will have a full name and a wealth group category from Row 3
267-
if wealth_group_df.iloc[i]["full_name"] and wealth_group_df.iloc[i]["wealth_group_category"]:
266+
if (
267+
wealth_group_df.loc[column, "full_name"]
268+
and wealth_group_df.loc[column, "wealth_group_category"]
269+
):
268270
reference_type = WealthGroupCharacteristicValue.CharacteristicReference.WEALTH_GROUP
269271
# Community (Form 3) values will have a full name from Rows 4 and 5, but no wealth group category
270-
elif wealth_group_df.iloc[i]["full_name"]:
272+
elif wealth_group_df.loc[column, "full_name"]:
271273
reference_type = WealthGroupCharacteristicValue.CharacteristicReference.COMMUNITY
272274
# Summary values will not have full name or a wealth category, and will be in the last 3 columns
273-
# Check for len(df.columns) -5 because the Summary col is 3rd from end, and i starts at Column C.
274-
elif i == len(df.columns) - 5:
275+
elif column == df.columns[-3]:
275276
reference_type = WealthGroupCharacteristicValue.CharacteristicReference.SUMMARY
276277
# There is no full name, and this isn't the summary, so we can ignore this column. This happens
277278
# because there are typically blank columns in BSS between each wealth group category. For example,
@@ -290,8 +291,8 @@ def wealth_characteristic_instances(
290291
value != ""
291292
and reference_type
292293
and (
293-
not wealth_group_df.iloc[i]["wealth_group_category"]
294-
or wealth_group_df.iloc[i]["wealth_group_category"] == wealth_group_category
294+
not wealth_group_df.loc[column, "wealth_group_category"]
295+
or wealth_group_df.loc[column, "wealth_group_category"] == wealth_group_category
295296
)
296297
):
297298
wealth_group_characteristic_value = attributes.copy()
@@ -304,7 +305,11 @@ def wealth_characteristic_instances(
304305
wealth_group_category,
305306
# Note that we need to use the actual name from the instance, not the one calculated from
306307
# the BSS, which might have been matched using an alias.
307-
wealth_group_df.iloc[i]["community"][2] if wealth_group_df.iloc[i]["community"] else "",
308+
(
309+
wealth_group_df.loc[column, "community"][2]
310+
if wealth_group_df.loc[column, "community"]
311+
else ""
312+
),
308313
)
309314

310315
wealth_group_characteristic_value["reference_type"] = reference_type
@@ -354,7 +359,12 @@ def wealth_characteristic_instances(
354359
[
355360
wealth_group_df,
356361
wealth_group_df[wealth_group_df["community"] == wealth_group_df.iloc[0]["community"]][
357-
["wealth_group_category_original", "wealth_group_category", "livelihood_zone_baseline", "community"]
362+
[
363+
"wealth_group_category_original",
364+
"wealth_group_category",
365+
"livelihood_zone_baseline",
366+
"community",
367+
]
358368
].assign(community=None),
359369
]
360370
)

0 commit comments

Comments
 (0)