Skip to content

Commit 73eace5

Browse files
authored
Merge pull request #179 from FEWS-NET/HEA-572/relax_data_completeness_constraint_4
Hea 572/relax data completeness constraint
2 parents 2c6ce2f + 6e7f151 commit 73eace5

File tree

6 files changed

+229
-232
lines changed

6 files changed

+229
-232
lines changed

pipelines/assets/fixtures.py

Lines changed: 169 additions & 143 deletions
Large diffs are not rendered by default.

pipelines/assets/livelihood_activity.py

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ def get_all_label_attributes(labels: pd.Series, activity_type: str, country_code
387387

388388
def get_instances_from_dataframe(
389389
context: AssetExecutionContext,
390+
config: BSSMetadataConfig,
390391
df: pd.DataFrame,
391392
livelihood_zone_baseline: LivelihoodZoneBaseline,
392393
activity_type: str,
@@ -727,8 +728,9 @@ def get_instances_from_dataframe(
727728
for i, livelihood_activity in enumerate(livelihood_activities_for_strategy):
728729
livelihood_activity["livelihood_strategy"] = livelihood_zone_baseline_key + [
729730
livelihood_strategy["strategy_type"],
730-
livelihood_strategy["season"] if livelihood_strategy["season"] else "",
731-
livelihood_strategy["product_id"] if livelihood_strategy["product_id"] else "",
731+
livelihood_strategy["season"] or "", # Natural key components must be "" rather than None
732+
livelihood_strategy["product_id"]
733+
or "", # Natural key components must be "" rather than None
732734
livelihood_strategy["additional_identifier"],
733735
]
734736

@@ -1149,13 +1151,6 @@ def get_instances_from_dataframe(
11491151
% (partition_key, worksheet_name, row, label)
11501152
) from e
11511153

1152-
raise_errors = True
1153-
if errors and raise_errors:
1154-
errors = "\n".join(errors)
1155-
raise RuntimeError(
1156-
"Missing or inconsistent metadata in BSS %s worksheet '%s':\n%s" % (partition_key, worksheet_name, errors)
1157-
)
1158-
11591154
result = {
11601155
"LivelihoodStrategy": livelihood_strategies,
11611156
"LivelihoodActivity": livelihood_activities,
@@ -1177,6 +1172,19 @@ def get_instances_from_dataframe(
11771172
if not unrecognized_labels.empty:
11781173
metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
11791174

1175+
if errors:
1176+
if config.strict:
1177+
raise RuntimeError(
1178+
"Missing or inconsistent metadata in BSS %s worksheet '%s':\n%s"
1179+
% (partition_key, worksheet_name, "\n".join(errors))
1180+
)
1181+
else:
1182+
context.log.error(
1183+
"Missing or inconsistent metadata in BSS %s worksheet '%s':\n%s"
1184+
% (partition_key, worksheet_name, "\n".join(errors))
1185+
)
1186+
metadata["errors"] = MetadataValue.md(f'```text\n{"\n".join(errors)}\n```')
1187+
11801188
return Output(
11811189
result,
11821190
metadata=metadata,
@@ -1185,6 +1193,7 @@ def get_instances_from_dataframe(
11851193

11861194
def get_annotated_instances_from_dataframe(
11871195
context: AssetExecutionContext,
1196+
config: BSSMetadataConfig,
11881197
livelihood_activity_dataframe: pd.DataFrame,
11891198
livelihood_summary_dataframe: pd.DataFrame,
11901199
activity_type: str,
@@ -1203,6 +1212,7 @@ def get_annotated_instances_from_dataframe(
12031212
# Get the detail LivelihoodStrategy and LivelihoodActivity instances
12041213
output = get_instances_from_dataframe(
12051214
context,
1215+
config,
12061216
livelihood_activity_dataframe,
12071217
livelihood_zone_baseline,
12081218
activity_type,
@@ -1214,6 +1224,7 @@ def get_annotated_instances_from_dataframe(
12141224
# Get the summary instances
12151225
reported_summary_output = get_instances_from_dataframe(
12161226
context,
1227+
config,
12171228
livelihood_summary_dataframe,
12181229
livelihood_zone_baseline,
12191230
ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY,
@@ -1325,7 +1336,9 @@ def get_annotated_instances_from_dataframe(
13251336
summary_df.replace(pd.NA, None).to_markdown(floatfmt=",.0f")
13261337
)
13271338

1328-
# Move the preview and metadata item to the end of the dict
1339+
# Move the preview and errors metadata item to the end of the dict
1340+
if "errors" in output.metadata:
1341+
output.metadata["errors"] = output.metadata.pop("errors")
13291342
output.metadata["preview"] = output.metadata.pop("preview")
13301343

13311344
return output
@@ -1334,26 +1347,27 @@ def get_annotated_instances_from_dataframe(
13341347
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
13351348
def livelihood_activity_instances(
13361349
context: AssetExecutionContext,
1350+
config: BSSMetadataConfig,
13371351
livelihood_activity_dataframe: pd.DataFrame,
13381352
livelihood_summary_dataframe: pd.DataFrame,
13391353
) -> Output[dict]:
13401354
"""
13411355
LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
13421356
"""
1343-
output = get_annotated_instances_from_dataframe(
1357+
return get_annotated_instances_from_dataframe(
13441358
context,
1359+
config,
13451360
livelihood_activity_dataframe,
13461361
livelihood_summary_dataframe,
1347-
ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY,
1362+
ActivityLabel.LivelihoodActivityType.LIVELIHOOD_ACTIVITY,
13481363
len(HEADER_ROWS),
13491364
)
13501365

1351-
return output
1352-
13531366

13541367
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
13551368
def livelihood_activity_valid_instances(
13561369
context: AssetExecutionContext,
1370+
config: BSSMetadataConfig,
13571371
livelihood_activity_instances: dict,
13581372
wealth_characteristic_instances: dict,
13591373
) -> Output[dict]:
@@ -1369,16 +1383,7 @@ def livelihood_activity_valid_instances(
13691383
**{"WealthGroup": wealth_characteristic_instances["WealthGroup"]},
13701384
**livelihood_activity_instances,
13711385
}
1372-
valid_instances, metadata = validate_instances(context, livelihood_activity_instances, partition_key)
1373-
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
1374-
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
1375-
metadata["preview"] = MetadataValue.md(
1376-
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
1377-
)
1378-
return Output(
1379-
valid_instances,
1380-
metadata=metadata,
1381-
)
1386+
return validate_instances(context, config, livelihood_activity_instances, partition_key)
13821387

13831388

13841389
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
@@ -1390,11 +1395,7 @@ def livelihood_activity_fixture(
13901395
"""
13911396
Django fixture for the Livelihood Activities from a BSS.
13921397
"""
1393-
fixture, metadata = get_fixture_from_instances(livelihood_activity_valid_instances)
1394-
return Output(
1395-
fixture,
1396-
metadata=metadata,
1397-
)
1398+
return get_fixture_from_instances(livelihood_activity_valid_instances)
13981399

13991400

14001401
@asset(partitions_def=bss_instances_partitions_def)
@@ -1405,8 +1406,4 @@ def imported_livelihood_activities(
14051406
"""
14061407
Imported Django fixtures for a BSS, added to the Django database.
14071408
"""
1408-
metadata = import_fixture(livelihood_activity_fixture)
1409-
return Output(
1410-
None,
1411-
metadata=metadata,
1412-
)
1409+
return import_fixture(livelihood_activity_fixture)

pipelines/assets/other_cash_income.py

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,11 @@
3838
| 32 | income | | | | | | | | | |
3939
""" # NOQA: E501
4040

41-
import json
4241
import os
4342

4443
import django
4544
import pandas as pd
46-
from dagster import AssetExecutionContext, MetadataValue, Output, asset
45+
from dagster import AssetExecutionContext, Output, asset
4746

4847
from ..configs import BSSMetadataConfig
4948
from ..partitions import bss_instances_partitions_def
@@ -127,29 +126,30 @@ def summary_other_cash_income_labels_dataframe(
127126
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
128127
def other_cash_income_instances(
129128
context: AssetExecutionContext,
129+
config: BSSMetadataConfig,
130130
other_cash_income_dataframe: pd.DataFrame,
131131
livelihood_summary_dataframe: pd.DataFrame,
132132
) -> Output[dict]:
133133
"""
134134
LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
135135
"""
136136
if other_cash_income_dataframe.empty:
137-
output = {}
137+
return Output({}, metadata={"message": "No Data2 worksheet found in this BSS"})
138138

139-
output = get_annotated_instances_from_dataframe(
139+
return get_annotated_instances_from_dataframe(
140140
context,
141+
config,
141142
other_cash_income_dataframe,
142143
livelihood_summary_dataframe,
143144
ActivityLabel.LivelihoodActivityType.OTHER_CASH_INCOME,
144145
len(HEADER_ROWS),
145146
)
146147

147-
return output
148-
149148

150149
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
151150
def other_cash_income_valid_instances(
152151
context: AssetExecutionContext,
152+
config: BSSMetadataConfig,
153153
other_cash_income_instances: dict,
154154
wealth_characteristic_instances: dict,
155155
) -> Output[dict]:
@@ -165,16 +165,7 @@ def other_cash_income_valid_instances(
165165
**{"WealthGroup": wealth_characteristic_instances["WealthGroup"]},
166166
**other_cash_income_instances,
167167
}
168-
valid_instances, metadata = validate_instances(context, other_cash_income_instances, partition_key)
169-
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
170-
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
171-
metadata["preview"] = MetadataValue.md(
172-
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
173-
)
174-
return Output(
175-
valid_instances,
176-
metadata=metadata,
177-
)
168+
return validate_instances(context, config, other_cash_income_instances, partition_key)
178169

179170

180171
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
@@ -186,11 +177,7 @@ def other_cash_income_fixture(
186177
"""
187178
Django fixture for the Livelihood Activities from a BSS.
188179
"""
189-
fixture, metadata = get_fixture_from_instances(other_cash_income_valid_instances)
190-
return Output(
191-
fixture,
192-
metadata=metadata,
193-
)
180+
return get_fixture_from_instances(other_cash_income_valid_instances)
194181

195182

196183
@asset(partitions_def=bss_instances_partitions_def)
@@ -201,8 +188,4 @@ def imported_other_cash_income_activities(
201188
"""
202189
Imported Django fixtures for a BSS, added to the Django database.
203190
"""
204-
metadata = import_fixture(other_cash_income_fixture)
205-
return Output(
206-
None,
207-
metadata=metadata,
208-
)
191+
return import_fixture(other_cash_income_fixture)

pipelines/assets/wealth_characteristic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,13 +524,14 @@ def wealth_characteristic_instances(
524524
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
525525
def wealth_characteristic_valid_instances(
526526
context: AssetExecutionContext,
527+
config: BSSMetadataConfig,
527528
wealth_characteristic_instances,
528529
) -> Output[dict]:
529530
"""
530531
Valid WealthGroup and WealthGroupCharacteristicValue instances from a BSS, ready to be loaded via a Django fixture.
531532
"""
532533
partition_key = context.asset_partition_key_for_output()
533-
valid_instances, metadata = validate_instances(context, wealth_characteristic_instances, partition_key)
534+
valid_instances, metadata = validate_instances(context, config, wealth_characteristic_instances, partition_key)
534535
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
535536
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
536537
metadata["preview"] = MetadataValue.md(

pipelines/assets/wild_foods.py

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,11 @@
5656
| 85 | TOTAL FISHING KCALS (%) | 0.009088932377 | 0.005577299413 | 0 | 0.009639776763 | 0.01133165595 | 0 | 0 | 0.009708632311 | 0 |
5757
""" # NOQA: E501
5858

59-
import json
6059
import os
6160

6261
import django
6362
import pandas as pd
64-
from dagster import AssetExecutionContext, MetadataValue, Output, asset
63+
from dagster import AssetExecutionContext, Output, asset
6564

6665
from ..configs import BSSMetadataConfig
6766
from ..partitions import bss_instances_partitions_def
@@ -137,29 +136,30 @@ def summary_wild_foods_labels_dataframe(
137136
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
138137
def wild_foods_instances(
139138
context: AssetExecutionContext,
139+
config: BSSMetadataConfig,
140140
wild_foods_dataframe: pd.DataFrame,
141141
livelihood_summary_dataframe: pd.DataFrame,
142142
) -> Output[dict]:
143143
"""
144144
LivelhoodStrategy and LivelihoodActivity instances extracted from the BSS.
145145
"""
146146
if wild_foods_dataframe.empty:
147-
output = {}
147+
return Output({}, metadata={"message": "No Data3 worksheet found in this BSS"})
148148

149-
output = get_annotated_instances_from_dataframe(
149+
return get_annotated_instances_from_dataframe(
150150
context,
151+
config,
151152
wild_foods_dataframe,
152153
livelihood_summary_dataframe,
153154
ActivityLabel.LivelihoodActivityType.WILD_FOODS,
154155
len(HEADER_ROWS),
155156
)
156157

157-
return output
158-
159158

160159
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
161160
def wild_foods_valid_instances(
162161
context: AssetExecutionContext,
162+
config: BSSMetadataConfig,
163163
wild_foods_instances: dict,
164164
wealth_characteristic_instances: dict,
165165
) -> Output[dict]:
@@ -175,16 +175,7 @@ def wild_foods_valid_instances(
175175
**{"WealthGroup": wealth_characteristic_instances["WealthGroup"]},
176176
**wild_foods_instances,
177177
}
178-
valid_instances, metadata = validate_instances(context, wild_foods_instances, partition_key)
179-
metadata = {f"num_{key.lower()}": len(value) for key, value in valid_instances.items()}
180-
metadata["total_instances"] = sum(len(value) for value in valid_instances.values())
181-
metadata["preview"] = MetadataValue.md(
182-
f"```json\n{json.dumps(valid_instances, indent=4, ensure_ascii=False)}\n```"
183-
)
184-
return Output(
185-
valid_instances,
186-
metadata=metadata,
187-
)
178+
return validate_instances(context, config, wild_foods_instances, partition_key)
188179

189180

190181
@asset(partitions_def=bss_instances_partitions_def, io_manager_key="json_io_manager")
@@ -196,11 +187,7 @@ def wild_foods_fixture(
196187
"""
197188
Django fixture for the Livelihood Activities from a BSS.
198189
"""
199-
fixture, metadata = get_fixture_from_instances(wild_foods_valid_instances)
200-
return Output(
201-
fixture,
202-
metadata=metadata,
203-
)
190+
return get_fixture_from_instances(wild_foods_valid_instances)
204191

205192

206193
@asset(partitions_def=bss_instances_partitions_def)
@@ -211,8 +198,4 @@ def imported_wild_foods_activities(
211198
"""
212199
Imported Django fixtures for a BSS, added to the Django database.
213200
"""
214-
metadata = import_fixture(wild_foods_fixture)
215-
return Output(
216-
None,
217-
metadata=metadata,
218-
)
201+
return import_fixture(wild_foods_fixture)

pyproject.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
[project]
2+
name = "hea-database-development"
3+
version = "0.1.0"
4+
description = "The HEA Database manages HEA Baseline data."
5+
readme = "README.md"
6+
requires-python = ">=3.12"
7+
18
[tool.ruff]
29
line-length = 119
3-
target-version = 'py310'
10+
target-version = 'py312'
411
exclude = [
512
'.eggs', # exclude a few common directories in the
613
'.git', # root of the project
@@ -27,7 +34,7 @@ docstring-quotes = "double"
2734

2835
[tool.black]
2936
line-length = 119
30-
target-version = ['py310']
37+
target-version = ['py312']
3138
include = '\.pyi?$'
3239
exclude = '''
3340

0 commit comments

Comments
 (0)