Skip to content

Commit 6e57b48

Browse files
authored
Error on invalid field format being sent to Validator (#462)
1 parent 68d498c commit 6e57b48

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "dapla-toolbelt-pseudo"
3-
version = "4.5.0"
3+
version = "5.0.0"
44
description = "Pseudonymization extensions for Dapla"
55
authors = ["Dapla Developers <dapla-platform-developers@ssb.no>"]
66
license = "MIT"

src/dapla_pseudo/v1/validation.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ def validate_map_to_stable_id(
110110
Returns:
111111
Result: Containing a result dataframe with associated metadata.
112112
"""
113+
Validator._ensure_field_valid(self._field, self._dataframe)
114+
113115
response: requests.Response = _client()._post_to_sid_endpoint(
114116
"sid/lookup/batch",
115117
self._dataframe[self._field].to_list(),
@@ -142,3 +144,40 @@ def validate_map_to_stable_id(
142144
],
143145
)
144146
)
147+
148+
@staticmethod
149+
def _ensure_field_valid(field: str, dataframe: pl.DataFrame) -> None:
150+
"""Ensure that all values are numeric and valid.
151+
152+
This is necessary for SID mapping.
153+
154+
Args:
155+
field (str): The identifier field.
156+
dataframe (pl.DataFrame): The dataframe to validate.
157+
158+
Raises:
159+
ValueError: If the field does not exist in the dataframe.
160+
"""
161+
if field not in dataframe.columns:
162+
raise ValueError(f"Field '{field}' does not exist in the dataframe.")
163+
164+
if dataframe.select(pl.col(field)).to_series().has_nulls():
165+
raise ValueError(
166+
f"Field '{field}' contains None/NaN values which are invalid for SID mapping."
167+
)
168+
169+
allowed_pattern = r"^\d+$" # only numeric
170+
invalid_entries = (
171+
dataframe.select(
172+
pl.col(field).str.contains(allowed_pattern).alias("is_valid"),
173+
pl.col(field),
174+
)
175+
.filter(~pl.col("is_valid"))
176+
.select(pl.col(field))
177+
)
178+
179+
if not invalid_entries.is_empty():
180+
invalid_values = invalid_entries.select(pl.col(field)).to_series().to_list()
181+
raise ValueError(
182+
f"Field '{field}' contains non-numeric values which are invalid for SID mapping: {invalid_values}"
183+
)

tests/v1/integration/test_validate.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,12 @@ def test_validate_not_valid(df_personer: pl.DataFrame) -> None:
2828
)
2929
pl_result = result.to_polars()
3030
assert sorted(pl_result["fnr"].to_list()) == sorted(expected_result)
31+
32+
33+
@pytest.mark.usefixtures("setup")
34+
@pytest.mark.parametrize("invalid_input", ["", "abc", None])
35+
@integration_test()
36+
def test_validate_invalid_input(invalid_input: str | None) -> None:
37+
df_invalid = pl.DataFrame({"fnr": [invalid_input]})
38+
with pytest.raises(ValueError):
39+
Validator.from_polars(df_invalid).on_field("fnr").validate_map_to_stable_id()

0 commit comments

Comments
 (0)