Skip to content

Commit a5d30a3

Browse files
committed
Apply field checks in xxx_valid_instances - see HEA-57
1 parent 0293531 commit a5d30a3

File tree

1 file changed

+27
-1
lines changed

1 file changed

+27
-1
lines changed

pipelines/assets/fixtures.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from io import StringIO
77

88
import django
9+
import numpy as np
910
import pandas as pd
1011
from dagster import AssetExecutionContext, MetadataValue, Output, asset
1112
from django.core.files import File
@@ -174,6 +175,31 @@ def validate_instances(
174175
)
175176
errors.append(error)
176177

178+
# Use the Django model to validate the fields, so we can apply already defined model validations and
179+
# return informative error messages.
180+
fields = [
181+
field
182+
for field in model._meta.concrete_fields
183+
if not isinstance(field, models.ForeignKey) and field.name in df
184+
]
185+
instance = model()
186+
for record in df.replace(np.nan, None).itertuples():
187+
for field in fields:
188+
value = getattr(record, field.name)
189+
if not value and field.null:
190+
# Replace empty strings with None for optional fields
191+
value = None
192+
try:
193+
field.clean(value, instance)
194+
except Exception as e:
195+
error = (
196+
f'Invalid {field.name} value {value}: "{", ".join(e.error_list[0].messages)}"\nRecord '
197+
f"{record.Index} from cell '{record.bss_sheet}'!{record.bss_column}{record.bss_row} "
198+
f"for {model_name} in record "
199+
f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}.'
200+
)
201+
errors.append(error)
202+
177203
# Check that the kcals/kg matches the values in the ClassifiedProduct model, if it's present in the BSS
178204
if model_name == "LivelihoodActivity" and "product__kcals_per_unit" in df:
179205
df["product"] = df["livelihood_strategy"].apply(lambda x: x[4])
@@ -182,7 +208,7 @@ def validate_instances(
182208
df["reference_unit_of_measure"] = df["product"].apply(lambda x: x.unit_of_measure)
183209
for record in df[df["product__kcals_per_unit"] != df["reference_kcals_per_unit"]].itertuples():
184210
error = (
185-
f"Non-standard value {record.product__kcals_per_unit} in '{record.column}"
211+
f"Non-standard value {record.product__kcals_per_unit} in '{record.column}' "
186212
f"for {model_name} in record "
187213
f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}. '
188214
f"Expected {record.reference_kcals_per_unit}/{record.reference_unit_of_measure} for {record.product}"

0 commit comments

Comments
 (0)