8585import django
8686import pandas as pd
8787from dagster import AssetExecutionContext , MetadataValue , Output , asset
88- from openpyxl .utils import get_column_letter
8988
9089from ..configs import BSSMetadataConfig
9190from ..partitions import bss_instances_partitions_def
@@ -197,7 +196,9 @@ def wealth_characteristic_instances(
197196 context .log .info ("Loaded %d Wealth Characteristic Labels" , len (label_map ))
198197
199198 # Get a dataframe of the Wealth Groups for each column
200- wealth_group_df = get_wealth_group_dataframe (df , livelihood_zone_baseline , "WB" , partition_key )
199+ wealth_group_df = get_wealth_group_dataframe (df , livelihood_zone_baseline , "WB" , partition_key ).set_index (
200+ "bss_column" , drop = False
201+ )
201202
202203 # Prepare the label column for matching against the label_map
203204 prepared_labels = prepare_lookup (df ["A" ])
@@ -257,21 +258,21 @@ def wealth_characteristic_instances(
257258 # Iterate over the value columns, from Column C to the the Summary Column.
258259 # We don't iterate over the last two columns because they contain the min_value and max_value that are
259260 # part of the Summary Wealth Characteristic Value rather than a separate Wealth Characteristic Value.
260- for i , value in enumerate (df .loc [row , "C" : df .columns [- 3 ]]):
261- # Store the column to aid trouble-shooting.
262- # We need col_index + 1 to get the letter, and the enumerate is already starting from col C
263- column = get_column_letter (i + 3 )
261+ for column in df .columns [2 :- 2 ]:
262+ value = df .loc [row , column ]
264263 try :
265264 # Add find the reference_type:
266265 # Wealth Group (Form 4) values will have a full name and a wealth group category from Row 3
267- if wealth_group_df .iloc [i ]["full_name" ] and wealth_group_df .iloc [i ]["wealth_group_category" ]:
266+ if (
267+ wealth_group_df .loc [column , "full_name" ]
268+ and wealth_group_df .loc [column , "wealth_group_category" ]
269+ ):
268270 reference_type = WealthGroupCharacteristicValue .CharacteristicReference .WEALTH_GROUP
269271 # Community (Form 3) values will have a full name from Rows 4 and 5, but no wealth group category
270- elif wealth_group_df .iloc [ i ][ "full_name" ]:
272+ elif wealth_group_df .loc [ column , "full_name" ]:
271273 reference_type = WealthGroupCharacteristicValue .CharacteristicReference .COMMUNITY
272274 # Summary values will not have full name or a wealth category, and will be in the last 3 columns
273- # Check for len(df.columns) -5 because the Summary col is 3rd from end, and i starts at Column C.
274- elif i == len (df .columns ) - 5 :
275+ elif column == df .columns [- 3 ]:
275276 reference_type = WealthGroupCharacteristicValue .CharacteristicReference .SUMMARY
276277 # There is no full name, and this isn't the summary, so we can ignore this column. This happens
277278 # because there are typically blank columns in BSS between each wealth group category. For example,
@@ -290,8 +291,8 @@ def wealth_characteristic_instances(
290291 value != ""
291292 and reference_type
292293 and (
293- not wealth_group_df .iloc [ i ][ "wealth_group_category" ]
294- or wealth_group_df .iloc [ i ][ "wealth_group_category" ] == wealth_group_category
294+ not wealth_group_df .loc [ column , "wealth_group_category" ]
295+ or wealth_group_df .loc [ column , "wealth_group_category" ] == wealth_group_category
295296 )
296297 ):
297298 wealth_group_characteristic_value = attributes .copy ()
@@ -304,7 +305,11 @@ def wealth_characteristic_instances(
304305 wealth_group_category ,
305306 # Note that we need to use the actual name from the instance, not the one calculated from
306307 # the BSS, which might have been matched using an alias.
307- wealth_group_df .iloc [i ]["community" ][2 ] if wealth_group_df .iloc [i ]["community" ] else "" ,
308+ (
309+ wealth_group_df .loc [column , "community" ][2 ]
310+ if wealth_group_df .loc [column , "community" ]
311+ else ""
312+ ),
308313 )
309314
310315 wealth_group_characteristic_value ["reference_type" ] = reference_type
@@ -354,7 +359,12 @@ def wealth_characteristic_instances(
354359 [
355360 wealth_group_df ,
356361 wealth_group_df [wealth_group_df ["community" ] == wealth_group_df .iloc [0 ]["community" ]][
357- ["wealth_group_category_original" , "wealth_group_category" , "livelihood_zone_baseline" , "community" ]
362+ [
363+ "wealth_group_category_original" ,
364+ "wealth_group_category" ,
365+ "livelihood_zone_baseline" ,
366+ "community" ,
367+ ]
358368 ].assign (community = None ),
359369 ]
360370 )
0 commit comments