Skip to content

Commit bd9f36f

Browse files
authored
Merge pull request #131 from FEWS-NET/HEA-572/relax_data_completeness_constraint
HEA-572 Add field-level checks to xxx_valid_instances
2 parents 86d11ce + fe6aa6d commit bd9f36f

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Generated by Django 5.1.1 on 2024-11-22 03:51
2+
3+
import django.core.validators
4+
from django.db import migrations, models
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("baseline", "0016_alter_livelihoodstrategy_additional_identifier_and_more"),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name="livelihoodactivity",
16+
name="percentage_kcals",
17+
field=models.FloatField(
18+
blank=True,
19+
help_text="Percentage of annual household kcal requirement provided by this livelihood strategy",
20+
null=True,
21+
validators=[django.core.validators.MinValueValidator(0)],
22+
verbose_name="Percentage of required kcals",
23+
),
24+
),
25+
]

apps/baseline/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1125,6 +1125,7 @@ class LivelihoodActivity(common_models.Model):
11251125
percentage_kcals = models.FloatField(
11261126
blank=True,
11271127
null=True,
1128+
validators=[MinValueValidator(0)],
11281129
verbose_name=_("Percentage of required kcals"),
11291130
help_text=_("Percentage of annual household kcal requirement provided by this livelihood strategy"),
11301131
)

pipelines/assets/fixtures.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from io import StringIO
77

88
import django
9+
import numpy as np
910
import pandas as pd
1011
from dagster import AssetExecutionContext, MetadataValue, Output, asset
1112
from django.core.files import File
@@ -174,6 +175,31 @@ def validate_instances(
174175
)
175176
errors.append(error)
176177

178+
# Use the Django model to validate the fields, so we can apply already defined model validations and
179+
# return informative error messages.
180+
fields = [
181+
field
182+
for field in model._meta.concrete_fields
183+
if not isinstance(field, models.ForeignKey) and field.name in df
184+
]
185+
instance = model()
186+
for record in df.replace(np.nan, None).itertuples():
187+
for field in fields:
188+
value = getattr(record, field.name)
189+
if not value and field.null:
190+
# Replace empty strings with None for optional fields
191+
value = None
192+
try:
193+
field.clean(value, instance)
194+
except Exception as e:
195+
error = (
196+
f'Invalid {field.name} value {value}: "{", ".join(e.error_list[0].messages)}"\nRecord '
197+
f"{record.Index} from cell '{record.bss_sheet}'!{record.bss_column}{record.bss_row} "
198+
f"for {model_name} in record "
199+
f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}.'
200+
)
201+
errors.append(error)
202+
177203
# Check that the kcals/kg matches the values in the ClassifiedProduct model, if it's present in the BSS
178204
if model_name == "LivelihoodActivity" and "product__kcals_per_unit" in df:
179205
df["product"] = df["livelihood_strategy"].apply(lambda x: x[4])
@@ -182,7 +208,7 @@ def validate_instances(
182208
df["reference_unit_of_measure"] = df["product"].apply(lambda x: x.unit_of_measure)
183209
for record in df[df["product__kcals_per_unit"] != df["reference_kcals_per_unit"]].itertuples():
184210
error = (
185-
f"Non-standard value {record.product__kcals_per_unit} in '{record.column}"
211+
f"Non-standard value {record.product__kcals_per_unit} in '{record.column}' "
186212
f"for {model_name} in record "
187213
f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}. '
188214
f"Expected {record.reference_kcals_per_unit}/{record.reference_unit_of_measure} for {record.product}"

pipelines/assets/livelihood_activity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def get_livelihood_activity_regexes() -> list:
180180
placeholder_patterns = {
181181
"label_pattern": r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?",
182182
"product_pattern": r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?)",
183-
"season_pattern": r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|gu|deyr+?)", # NOQA: E501
183+
"season_pattern": r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501
184184
"additional_identifier_pattern": r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?",
185185
"unit_of_measure_pattern": r"(?P<unit_of_measure_id>[a-z]+)",
186186
"nbr_pattern": r"(?:n[b|o]r?)\.?",

0 commit comments

Comments
 (0)