Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cognite/neat/_data_model/models/dms/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ENUM_VALUE_IDENTIFIER_PATTERN = r"^[_A-Za-z][_0-9A-Za-z]{0,127}$"
DM_VERSION_PATTERN = r"^[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?$"
DATA_MODEL_DESCRIPTION_MAX_LENGTH = 1024
ENUM_VALUES_MAX_COUNT = 32
FORBIDDEN_ENUM_VALUES = frozenset({"true", "false", "null"})
FORBIDDEN_SPACES = frozenset(["space", "cdf", "dms", "pg3", "shared", "system", "node", "edge"])
FORBIDDEN_CONTAINER_AND_VIEW_EXTERNAL_IDS = frozenset(
Expand Down
31 changes: 19 additions & 12 deletions cognite/neat/_data_model/models/dms/_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cognite.neat._utils.auxiliary import get_concrete_subclasses
from cognite.neat._utils.useful_types import BaseModelObject

from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN
from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, ENUM_VALUES_MAX_COUNT, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN
from ._references import ContainerReference, ViewReference


Expand Down Expand Up @@ -147,26 +147,33 @@ class EnumProperty(PropertyTypeDefinition):
values: dict[str, EnumValue] = Field(
description="A set of all possible values for the enum property.",
min_length=1,
max_length=32,
max_length=ENUM_VALUES_MAX_COUNT,
)

@field_validator("values", mode="after")
def _valid_enum_value(cls, val: dict[str, EnumValue]) -> dict[str, EnumValue]:
errors: list[str] = []
invalid_pattern: list[str] = []
invalid_length: list[str] = []
forbidden: list[str] = []
for key in val.keys():
if not _ENUM_KEY.match(key):
errors.append(
f"Enum value {key!r} is not valid. Enum values must match "
f"the pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}"
)
invalid_pattern.append(key)
if len(key) > 128 or len(key) < 1:
errors.append(f"Enum value {key!r} must be between 1 and 128 characters long.")
invalid_length.append(key)
if key.lower() in FORBIDDEN_ENUM_VALUES:
errors.append(
f"Enum value {key!r} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}"
)
forbidden.append(key)
errors: list[str] = []
if invalid_pattern:
keys = ", ".join(repr(k) for k in invalid_pattern)
errors.append(f"Enum values {keys} do not match the required pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}")
if invalid_length:
keys = ", ".join(repr(k) for k in invalid_length)
errors.append(f"Enum values {keys} must be between 1 and 128 characters long.")
if forbidden:
keys = ", ".join(repr(k) for k in forbidden)
errors.append(f"Enum values {keys} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}")
if errors:
raise ValueError(";".join(errors))
raise ValueError("; ".join(errors))
return val


Expand Down
29 changes: 22 additions & 7 deletions cognite/neat/_utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from pydantic_core import ErrorDetails

from cognite.neat._data_model.models.dms._constants import ENUM_VALUES_MAX_COUNT


def as_json_path(loc: tuple[str | int, ...]) -> str:
"""Converts a location tuple to a JSON path.
Expand Down Expand Up @@ -92,7 +94,9 @@ def humanize_validation_error(
f"type {type(error['input']).__name__}."
)
elif type_ == "union_tag_invalid":
msg = error["msg"].replace(", 'direct'", "").replace("found using 'type' ", "").replace("tag", "value")
ctx = error["ctx"]
expected_tags = ctx["expected_tags"].replace(", 'direct'", "")
msg = f"Input value '{ctx['tag']}' does not match any of the expected values: {expected_tags}"
elif type_ == "string_pattern_mismatch":
msg = f"string '{error['input']}' does not match the required pattern: '{error['ctx']['pattern']}'."

Expand All @@ -112,7 +116,7 @@ def humanize_validation_error(

if len(loc) >= 3 and context.field_name == "column" and loc[-3:] == ("type", "enum", "values"):
# Special handling for enum errors in table columns
msg = _enum_message(type_, loc, context)
msg = _enum_message(type_, loc, context, error["msg"])
elif len(loc) > 1 and type_ in {"extra_forbidden", "missing"}:
if context.missing_required_descriptor == "empty" and type_ == "missing":
# This is a table so we modify the error message.
Expand All @@ -138,17 +142,28 @@ def humanize_validation_error(
return msg


def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext) -> str:
def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext, raw_msg: str) -> str:
"""Special handling of enum errors in table columns."""

location = context.humanize_location(loc[:-1] if loc[-1] == "values" else loc)

if loc[-1] != "values":
raise RuntimeError("This is a neat bug, report to the team.")
raise RuntimeError(
f"_enum_message called with unexpected loc={loc!r}, type_={type_!r}, raw_msg={raw_msg!r}. "
"This is a bug in NEAT."
)
if type_ == "missing":
return (
f"In {context.humanize_location(loc[:-1])} definition should include "
f"In {location}: definition should include "
"a reference to a collection in the 'Enum' sheet (e.g., collection='MyEnumCollection')."
)
elif type_ == "too_short":
return f"In {context.humanize_location(loc[:-1])} collection is not defined in the 'Enum' sheet"
return f"In {location}: collection is not defined in the 'Enum' sheet"
elif type_ == "too_long":
return f"In {location}: collection has too many possible values (max {ENUM_VALUES_MAX_COUNT} allowed)"
elif type_ == "value_error":
detail = raw_msg.removeprefix("Value error, ")
return f"In {location}: {detail}"
else:
raise RuntimeError("This is a neat bug, report to the team.")
detail = raw_msg.removeprefix("Value error, ")
return f"In {location}: {detail}"
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ def test_validate_data_types(self, data_type: dict[str, Any]) -> None:
pytest.param(
{"type": "enum", "values": {"validValue1": {}, "invalid-value": {}}},
{
"In enum.values enum value 'invalid-value' is not valid. Enum values must "
"match the pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$."
"In enum.values enum values 'invalid-value' do not match the required "
"pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$."
},
id="Enum with invalid value key",
),
Expand Down
12 changes: 10 additions & 2 deletions tests/tests_unit/test_utils/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class TestHumanizeValidationError:
missing_required_descriptor="empty",
),
(
"In table 'Properties' row 277 column 'Value Type' -> enum"
"In table 'Properties' row 277 column 'Value Type' -> enum:"
" definition should include a reference to a collection in the 'Enum' sheet"
" (e.g., collection='MyEnumCollection')."
),
Expand All @@ -94,7 +94,7 @@ class TestHumanizeValidationError:
missing_required_descriptor="empty",
),
(
"In table 'Properties' row 277 column 'Value Type' -> enum"
"In table 'Properties' row 277 column 'Value Type' -> enum:"
" collection is not defined in the 'Enum' sheet."
),
id="Missing enum collection",
Expand All @@ -110,6 +110,14 @@ class TestHumanizeValidationError:
"'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'"
),
"input": {"maxListSize": None, "list": False, "type": "primitive"},
"ctx": {
"discriminator": "'type'",
"tag": "primitive",
"expected_tags": (
"'text', 'float32', 'float64', 'boolean', 'int32', 'int64', 'timestamp', 'date', "
"'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'"
),
},
}
),
ValidationContext(
Expand Down