diff --git a/cognite/neat/_data_model/models/dms/_constants.py b/cognite/neat/_data_model/models/dms/_constants.py index 8202c7fbc..2c785a00c 100644 --- a/cognite/neat/_data_model/models/dms/_constants.py +++ b/cognite/neat/_data_model/models/dms/_constants.py @@ -5,6 +5,7 @@ ENUM_VALUE_IDENTIFIER_PATTERN = r"^[_A-Za-z][_0-9A-Za-z]{0,127}$" DM_VERSION_PATTERN = r"^[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?$" DATA_MODEL_DESCRIPTION_MAX_LENGTH = 1024 +ENUM_VALUES_MAX_COUNT = 32 FORBIDDEN_ENUM_VALUES = frozenset({"true", "false", "null"}) FORBIDDEN_SPACES = frozenset(["space", "cdf", "dms", "pg3", "shared", "system", "node", "edge"]) FORBIDDEN_CONTAINER_AND_VIEW_EXTERNAL_IDS = frozenset( diff --git a/cognite/neat/_data_model/models/dms/_data_types.py b/cognite/neat/_data_model/models/dms/_data_types.py index 2e28edf24..6fe5cd578 100644 --- a/cognite/neat/_data_model/models/dms/_data_types.py +++ b/cognite/neat/_data_model/models/dms/_data_types.py @@ -8,7 +8,7 @@ from cognite.neat._utils.auxiliary import get_concrete_subclasses from cognite.neat._utils.useful_types import BaseModelObject -from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN +from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, ENUM_VALUES_MAX_COUNT, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN from ._references import ContainerReference, ViewReference @@ -147,26 +147,33 @@ class EnumProperty(PropertyTypeDefinition): values: dict[str, EnumValue] = Field( description="A set of all possible values for the enum property.", min_length=1, - max_length=32, + max_length=ENUM_VALUES_MAX_COUNT, ) @field_validator("values", mode="after") def _valid_enum_value(cls, val: dict[str, EnumValue]) -> dict[str, EnumValue]: - errors: list[str] = [] + invalid_pattern: list[str] = [] + invalid_length: list[str] = [] + forbidden: list[str] = [] for key in val.keys(): if not _ENUM_KEY.match(key): - errors.append( - f"Enum value {key!r} is not valid. Enum values must match " - f"the pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}" - ) + invalid_pattern.append(key) if len(key) > 128 or len(key) < 1: - errors.append(f"Enum value {key!r} must be between 1 and 128 characters long.") + invalid_length.append(key) if key.lower() in FORBIDDEN_ENUM_VALUES: - errors.append( - f"Enum value {key!r} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}" - ) + forbidden.append(key) + errors: list[str] = [] + if invalid_pattern: + keys = ", ".join(repr(k) for k in invalid_pattern) + errors.append(f"Enum values {keys} do not match the required pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}") + if invalid_length: + keys = ", ".join(repr(k) for k in invalid_length) + errors.append(f"Enum values {keys} must be between 1 and 128 characters long.") + if forbidden: + keys = ", ".join(repr(k) for k in forbidden) + errors.append(f"Enum values {keys} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}") if errors: - raise ValueError(";".join(errors)) + raise ValueError("; ".join(errors)) return val diff --git a/cognite/neat/_utils/validation.py b/cognite/neat/_utils/validation.py index 013b173f0..5d9611c3c 100644 --- a/cognite/neat/_utils/validation.py +++ b/cognite/neat/_utils/validation.py @@ -4,6 +4,8 @@ from pydantic_core import ErrorDetails +from cognite.neat._data_model.models.dms._constants import ENUM_VALUES_MAX_COUNT + def as_json_path(loc: tuple[str | int, ...]) -> str: """Converts a location tuple to a JSON path. @@ -92,7 +94,9 @@ def humanize_validation_error( f"type {type(error['input']).__name__}." ) elif type_ == "union_tag_invalid": - msg = error["msg"].replace(", 'direct'", "").replace("found using 'type' ", "").replace("tag", "value") + ctx = error["ctx"] + expected_tags = ctx["expected_tags"].replace(", 'direct'", "") + msg = f"Input value '{ctx['tag']}' does not match any of the expected values: {expected_tags}" elif type_ == "string_pattern_mismatch": msg = f"string '{error['input']}' does not match the required pattern: '{error['ctx']['pattern']}'." @@ -112,7 +116,7 @@ def humanize_validation_error( if len(loc) >= 3 and context.field_name == "column" and loc[-3:] == ("type", "enum", "values"): # Special handling for enum errors in table columns - msg = _enum_message(type_, loc, context) + msg = _enum_message(type_, loc, context, error["msg"]) elif len(loc) > 1 and type_ in {"extra_forbidden", "missing"}: if context.missing_required_descriptor == "empty" and type_ == "missing": # This is a table so we modify the error message. @@ -138,17 +142,28 @@ def humanize_validation_error( return msg -def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext) -> str: +def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext, raw_msg: str) -> str: """Special handling of enum errors in table columns.""" + location = context.humanize_location(loc[:-1] if loc[-1] == "values" else loc) + if loc[-1] != "values": - raise RuntimeError("This is a neat bug, report to the team.") + raise RuntimeError( + f"_enum_message called with unexpected loc={loc!r}, type_={type_!r}, raw_msg={raw_msg!r}. " + "This is a bug in NEAT." + ) if type_ == "missing": return ( - f"In {context.humanize_location(loc[:-1])} definition should include " + f"In {location}: definition should include " "a reference to a collection in the 'Enum' sheet (e.g., collection='MyEnumCollection')." ) elif type_ == "too_short": - return f"In {context.humanize_location(loc[:-1])} collection is not defined in the 'Enum' sheet" + return f"In {location}: collection is not defined in the 'Enum' sheet" + elif type_ == "too_long": + return f"In {location}: collection has too many possible values (max {ENUM_VALUES_MAX_COUNT} allowed)" + elif type_ == "value_error": + detail = raw_msg.removeprefix("Value error, ") + return f"In {location}: {detail}" else: - raise RuntimeError("This is a neat bug, report to the team.") + detail = raw_msg.removeprefix("Value error, ") + return f"In {location}: {detail}" diff --git a/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py b/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py index 370573053..475e2a40d 100644 --- a/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py +++ b/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py @@ -147,8 +147,8 @@ def test_validate_data_types(self, data_type: dict[str, Any]) -> None: pytest.param( {"type": "enum", "values": {"validValue1": {}, "invalid-value": {}}}, { - "In enum.values enum value 'invalid-value' is not valid. Enum values must " - "match the pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$." + "In enum.values enum values 'invalid-value' do not match the required " + "pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$." }, id="Enum with invalid value key", ), diff --git a/tests/tests_unit/test_utils/test_validation.py b/tests/tests_unit/test_utils/test_validation.py index 54019d757..f8743807f 100644 --- a/tests/tests_unit/test_utils/test_validation.py +++ b/tests/tests_unit/test_utils/test_validation.py @@ -70,7 +70,7 @@ class TestHumanizeValidationError: missing_required_descriptor="empty", ), ( - "In table 'Properties' row 277 column 'Value Type' -> enum" + "In table 'Properties' row 277 column 'Value Type' -> enum:" " definition should include a reference to a collection in the 'Enum' sheet" " (e.g., collection='MyEnumCollection')." ), @@ -94,7 +94,7 @@ class TestHumanizeValidationError: missing_required_descriptor="empty", ), ( - "In table 'Properties' row 277 column 'Value Type' -> enum" + "In table 'Properties' row 277 column 'Value Type' -> enum:" " collection is not defined in the 'Enum' sheet." ), id="Missing enum collection", @@ -110,6 +110,14 @@ class TestHumanizeValidationError: "'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'" ), "input": {"maxListSize": None, "list": False, "type": "primitive"}, + "ctx": { + "discriminator": "'type'", + "tag": "primitive", + "expected_tags": ( + "'text', 'float32', 'float64', 'boolean', 'int32', 'int64', 'timestamp', 'date', " + "'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'" + ), + }, } ), ValidationContext(