Skip to content

Commit 310aad2

Browse files
committed
Fix of fix for problematic features explosion and re-aggregation
1 parent 8c4b6a3 commit 310aad2

File tree

1 file changed

+39
-10
lines changed

1 file changed

+39
-10
lines changed

tools/code/runAnalysis.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,8 @@ def has_nested_multiparts(gdf):
270270
print(f"Warning: Found problematic nested multipart geometries in ADM{adm_level} boundaries.")
271271
print(f"Converting multipart geometries to single parts for rasterstats compatibility...")
272272
n_original = len(adm_data)
273+
# Store original index to preserve grouping information
274+
adm_data['_original_idx'] = adm_data.index
273275
adm_data = adm_data.explode(index_parts=False).reset_index(drop=True)
274276
n_exploded = len(adm_data)
275277
print(f"Converted {n_original} features to {n_exploded} single-part features")
@@ -313,7 +315,11 @@ def has_nested_multiparts(gdf):
313315
exp_per_ADM = list(it.chain(*stats_parallel))
314316

315317
# Creating the results pandas dataframe
316-
result_df = adm_data.loc[:, all_adm_codes + all_adm_names + ["geometry"]]
318+
columns_to_include = all_adm_codes + all_adm_names + ["geometry"]
319+
# Include _original_idx if it exists (from exploded geometries)
320+
if '_original_idx' in adm_data.columns:
321+
columns_to_include.append('_original_idx')
322+
result_df = adm_data.loc[:, columns_to_include]
317323
result_df[f"ADM{adm_level}_{exp_cat}"] = [x['sum'] for x in exp_per_ADM]
318324

319325
# Cleaning-up memory
@@ -380,17 +386,35 @@ def has_nested_multiparts(gdf):
380386
# Aggregate results if multipart geometries were exploded
381387
if has_multipart:
382388
print(f"Aggregating results from exploded geometries back to original administrative units...")
383-
# Group by the code field and aggregate
389+
390+
# Verify grouping column exists and has valid values
391+
if '_original_idx' not in result_df.columns:
392+
raise ValueError("Original index not found in result_df. Cannot aggregate exploded geometries.")
393+
394+
# Check for NaN values in grouping columns
395+
grouping_col = '_original_idx'
396+
if result_df[grouping_col].isna().any():
397+
print(f"Warning: Found {result_df[grouping_col].isna().sum()} NaN values in grouping column")
398+
result_df = result_df.dropna(subset=[grouping_col])
399+
400+
# Get numeric columns for aggregation
384401
numeric_cols = result_df.select_dtypes(include=[np.number]).columns.tolist()
402+
# Remove the grouping index from numeric columns if present
403+
numeric_cols = [col for col in numeric_cols if col != '_original_idx']
385404

386-
# Aggregate numeric columns by sum, keep first value for name fields
405+
# Aggregate numeric columns by sum, keep first value for name and code fields
387406
agg_dict = {col: 'sum' for col in numeric_cols}
388-
for name_col in all_adm_names:
389-
if name_col in result_df.columns:
407+
for name_col in all_adm_names + all_adm_codes:
408+
if name_col in result_df.columns and name_col not in numeric_cols:
390409
agg_dict[name_col] = 'first'
391410

392-
# Use dissolve to merge geometries back to multipart
393-
result_df = result_df.dissolve(by=code_field, aggfunc=agg_dict).reset_index()
411+
# Use dissolve to merge geometries back to multipart based on original index
412+
result_df = result_df.dissolve(by=grouping_col, aggfunc=agg_dict).reset_index(drop=True)
413+
414+
# Remove the temporary grouping column
415+
if '_original_idx' in result_df.columns:
416+
result_df = result_df.drop('_original_idx', axis=1)
417+
394418
print(f"Aggregated to {len(result_df)} administrative units")
395419

396420
# Write output csv table and geopackages
@@ -499,16 +523,21 @@ def result_df_reorder_columns(result_df, RPs, analysis_type, exp_cat, adm_level,
499523
Reorders the columns of result_df.
500524
"""
501525
# Re-ordering and dropping selected columns for better presentation of the results
502-
526+
503527
if analysis_type != "Function":
504528
return result_df
505-
529+
506530
adm_column = f"ADM{adm_level}_{exp_cat}"
507-
531+
508532
all_RPs = ["RP" + str(rp) for rp in RPs]
509533
all_exp = [x + f"_{exp_cat}_exp" for x in all_RPs]
510534
all_imp = [x + f"_{exp_cat}_imp" for x in all_RPs]
511535
col_order = all_adm_codes + all_adm_names + [adm_column] + all_exp + all_imp + ["geometry"]
536+
537+
# Preserve _original_idx if it exists (from exploded geometries)
538+
if '_original_idx' in result_df.columns:
539+
col_order.append('_original_idx')
540+
512541
result_df = result_df.loc[:, col_order]
513542

514543
return result_df

0 commit comments

Comments
 (0)