From 1ff38136589ab775e15df4602746d32f50852435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20Schj=C3=B8lberg?= Date: Thu, 26 Feb 2026 15:14:24 +0100 Subject: [PATCH 1/6] Improve ModelSyntaxError validation --- .../importers/_table_importer/reader.py | 31 +++++++++++++++- .../neat/_data_model/models/dms/_constants.py | 1 + .../_data_model/models/dms/_data_types.py | 35 ++++++++++++------- cognite/neat/_utils/validation.py | 26 ++++++++++---- .../test_models/test_dms/test_containers.py | 4 +-- .../tests_unit/test_utils/test_validation.py | 12 +++++-- 6 files changed, 85 insertions(+), 24 deletions(-) diff --git a/cognite/neat/_data_model/importers/_table_importer/reader.py b/cognite/neat/_data_model/importers/_table_importer/reader.py index 2332d6cd73..5e28b28be9 100644 --- a/cognite/neat/_data_model/importers/_table_importer/reader.py +++ b/cognite/neat/_data_model/importers/_table_importer/reader.py @@ -1,4 +1,5 @@ import json +import re from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Literal, TypeVar, cast, overload @@ -23,7 +24,11 @@ ViewRequestProperty, ViewRequestPropertyAdapter, ) -from cognite.neat._data_model.models.dms._constants import DATA_MODEL_DESCRIPTION_MAX_LENGTH +from cognite.neat._data_model.models.dms._constants import ( + CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN, + DATA_MODEL_DESCRIPTION_MAX_LENGTH, + FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER, +) from cognite.neat._data_model.models.entities import ParsedEntity, parse_entity from cognite.neat._exceptions import DataModelImportException from cognite.neat._issues import ModelSyntaxError @@ -46,6 +51,7 @@ from .source import TableSource T_BaseModel = TypeVar("T_BaseModel", bound=BaseModel) +_PROPERTY_ID_PATTERN = re.compile(CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN) @dataclass @@ -428,6 +434,29 @@ def _process_view_property( view_entities: set[ParsedEntity], ) -> None: loc = (self.Sheets.properties, row_no) + if not _PROPERTY_ID_PATTERN.match(prop.view_property): + self.errors.append( + ModelSyntaxError( + message=( + f"In {self.source.location(loc)} column {self.PropertyColumns.view_property!r}" + f" value {prop.view_property!r} does not match the required" + f" pattern: {CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN}" + ) + ) + ) + return None + if prop.view_property in FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER: + self.errors.append( + ModelSyntaxError( + message=( + f"In {self.source.location(loc)} column {self.PropertyColumns.view_property!r}" + f" value {prop.view_property!r} is a reserved property identifier." + f" Reserved identifiers are: " + f"{humanize_collection(FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER)}" + ) + ) + ) + return None data = self.read_view_property(prop, loc) view_prop = self._validate_adapter(ViewRequestPropertyAdapter, data, loc) if view_prop is None: diff --git a/cognite/neat/_data_model/models/dms/_constants.py b/cognite/neat/_data_model/models/dms/_constants.py index 8202c7fbcf..2c785a00cf 100644 --- a/cognite/neat/_data_model/models/dms/_constants.py +++ b/cognite/neat/_data_model/models/dms/_constants.py @@ -5,6 +5,7 @@ ENUM_VALUE_IDENTIFIER_PATTERN = r"^[_A-Za-z][_0-9A-Za-z]{0,127}$" DM_VERSION_PATTERN = r"^[a-zA-Z0-9]([.a-zA-Z0-9_-]{0,41}[a-zA-Z0-9])?$" DATA_MODEL_DESCRIPTION_MAX_LENGTH = 1024 +ENUM_VALUES_MAX_COUNT = 32 FORBIDDEN_ENUM_VALUES = frozenset({"true", "false", "null"}) FORBIDDEN_SPACES = frozenset(["space", "cdf", "dms", "pg3", "shared", "system", "node", "edge"]) FORBIDDEN_CONTAINER_AND_VIEW_EXTERNAL_IDS = frozenset( diff --git a/cognite/neat/_data_model/models/dms/_data_types.py b/cognite/neat/_data_model/models/dms/_data_types.py index 2e28edf24e..da6e42a260 100644 --- a/cognite/neat/_data_model/models/dms/_data_types.py +++ b/cognite/neat/_data_model/models/dms/_data_types.py @@ -8,7 +8,7 @@ from cognite.neat._utils.auxiliary import get_concrete_subclasses from cognite.neat._utils.useful_types import BaseModelObject -from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN +from ._constants import ENUM_VALUE_IDENTIFIER_PATTERN, ENUM_VALUES_MAX_COUNT, FORBIDDEN_ENUM_VALUES, INSTANCE_ID_PATTERN from ._references import ContainerReference, ViewReference @@ -147,26 +147,37 @@ class EnumProperty(PropertyTypeDefinition): values: dict[str, EnumValue] = Field( description="A set of all possible values for the enum property.", min_length=1, - max_length=32, + max_length=ENUM_VALUES_MAX_COUNT, ) @field_validator("values", mode="after") def _valid_enum_value(cls, val: dict[str, EnumValue]) -> dict[str, EnumValue]: - errors: list[str] = [] + invalid_pattern: list[str] = [] + invalid_length: list[str] = [] + forbidden: list[str] = [] for key in val.keys(): if not _ENUM_KEY.match(key): - errors.append( - f"Enum value {key!r} is not valid. Enum values must match " - f"the pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}" - ) + invalid_pattern.append(key) if len(key) > 128 or len(key) < 1: - errors.append(f"Enum value {key!r} must be between 1 and 128 characters long.") + invalid_length.append(key) if key.lower() in FORBIDDEN_ENUM_VALUES: - errors.append( - f"Enum value {key!r} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}" - ) + forbidden.append(key) + errors: list[str] = [] + if invalid_pattern: + keys = ", ".join(repr(k) for k in invalid_pattern) + errors.append( + f"Enum values {keys} do not match the required pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}" + ) + if invalid_length: + keys = ", ".join(repr(k) for k in invalid_length) + errors.append(f"Enum values {keys} must be between 1 and 128 characters long.") + if forbidden: + keys = ", ".join(repr(k) for k in forbidden) + errors.append( + f"Enum values {keys} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}" + ) if errors: - raise ValueError(";".join(errors)) + raise ValueError("; ".join(errors)) return val diff --git a/cognite/neat/_utils/validation.py b/cognite/neat/_utils/validation.py index 013b173f03..f4eb5628b7 100644 --- a/cognite/neat/_utils/validation.py +++ b/cognite/neat/_utils/validation.py @@ -4,6 +4,8 @@ from pydantic_core import ErrorDetails +from cognite.neat._data_model.models.dms._constants import ENUM_VALUES_MAX_COUNT + def as_json_path(loc: tuple[str | int, ...]) -> str: """Converts a location tuple to a JSON path. @@ -92,7 +94,9 @@ def humanize_validation_error( f"type {type(error['input']).__name__}." ) elif type_ == "union_tag_invalid": - msg = error["msg"].replace(", 'direct'", "").replace("found using 'type' ", "").replace("tag", "value") + ctx = error["ctx"] + expected_tags = ctx["expected_tags"].replace(", 'direct'", "") + msg = f"Input value '{ctx['tag']}' does not match any of the expected values: {expected_tags}" elif type_ == "string_pattern_mismatch": msg = f"string '{error['input']}' does not match the required pattern: '{error['ctx']['pattern']}'." @@ -112,7 +116,7 @@ def humanize_validation_error( if len(loc) >= 3 and context.field_name == "column" and loc[-3:] == ("type", "enum", "values"): # Special handling for enum errors in table columns - msg = _enum_message(type_, loc, context) + msg = _enum_message(type_, loc, context, error["msg"]) elif len(loc) > 1 and type_ in {"extra_forbidden", "missing"}: if context.missing_required_descriptor == "empty" and type_ == "missing": # This is a table so we modify the error message. @@ -138,17 +142,25 @@ def humanize_validation_error( return msg -def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext) -> str: +def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationContext, raw_msg: str) -> str: """Special handling of enum errors in table columns.""" + location = context.humanize_location(loc[:-1] if loc[-1] == "values" else loc) + if loc[-1] != "values": - raise RuntimeError("This is a neat bug, report to the team.") + return f"In {location}: unexpected enum validation error with message '{raw_msg}'" if type_ == "missing": return ( - f"In {context.humanize_location(loc[:-1])} definition should include " + f"In {location}: definition should include " "a reference to a collection in the 'Enum' sheet (e.g., collection='MyEnumCollection')." ) elif type_ == "too_short": - return f"In {context.humanize_location(loc[:-1])} collection is not defined in the 'Enum' sheet" + return f"In {location}: collection is not defined in the 'Enum' sheet" + elif type_ == "too_long": + return f"In {location}: collection has too many possible values (max {ENUM_VALUES_MAX_COUNT} allowed)" + elif type_ == "value_error": + detail = raw_msg.removeprefix("Value error, ") + return f"In {location}: {detail}" else: - raise RuntimeError("This is a neat bug, report to the team.") + detail = raw_msg.removeprefix("Value error, ") + return f"In {location}: {detail}" diff --git a/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py b/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py index 3705730532..475e2a40d2 100644 --- a/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py +++ b/tests/tests_unit/test_data_model/test_models/test_dms/test_containers.py @@ -147,8 +147,8 @@ def test_validate_data_types(self, data_type: dict[str, Any]) -> None: pytest.param( {"type": "enum", "values": {"validValue1": {}, "invalid-value": {}}}, { - "In enum.values enum value 'invalid-value' is not valid. Enum values must " - "match the pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$." + "In enum.values enum values 'invalid-value' do not match the required " + "pattern: ^[_A-Za-z][_0-9A-Za-z]{0,127}$." }, id="Enum with invalid value key", ), diff --git a/tests/tests_unit/test_utils/test_validation.py b/tests/tests_unit/test_utils/test_validation.py index 54019d757d..f8743807ff 100644 --- a/tests/tests_unit/test_utils/test_validation.py +++ b/tests/tests_unit/test_utils/test_validation.py @@ -70,7 +70,7 @@ class TestHumanizeValidationError: missing_required_descriptor="empty", ), ( - "In table 'Properties' row 277 column 'Value Type' -> enum" + "In table 'Properties' row 277 column 'Value Type' -> enum:" " definition should include a reference to a collection in the 'Enum' sheet" " (e.g., collection='MyEnumCollection')." ), @@ -94,7 +94,7 @@ class TestHumanizeValidationError: missing_required_descriptor="empty", ), ( - "In table 'Properties' row 277 column 'Value Type' -> enum" + "In table 'Properties' row 277 column 'Value Type' -> enum:" " collection is not defined in the 'Enum' sheet." ), id="Missing enum collection", @@ -110,6 +110,14 @@ class TestHumanizeValidationError: "'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'" ), "input": {"maxListSize": None, "list": False, "type": "primitive"}, + "ctx": { + "discriminator": "'type'", + "tag": "primitive", + "expected_tags": ( + "'text', 'float32', 'float64', 'boolean', 'int32', 'int64', 'timestamp', 'date', " + "'json', 'timeseries', 'file', 'sequence', 'enum', 'direct'" + ), + }, } ), ValidationContext( From 59aac83ca0491d31b2fa6a40ffde56be13a05a9b Mon Sep 17 00:00:00 2001 From: Magssch <37403247+Magssch@users.noreply.github.com> Date: Thu, 26 Feb 2026 14:20:15 +0000 Subject: [PATCH 2/6] Linting and static code checks --- cognite/neat/_data_model/models/dms/_data_types.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cognite/neat/_data_model/models/dms/_data_types.py b/cognite/neat/_data_model/models/dms/_data_types.py index da6e42a260..6fe5cd578e 100644 --- a/cognite/neat/_data_model/models/dms/_data_types.py +++ b/cognite/neat/_data_model/models/dms/_data_types.py @@ -165,17 +165,13 @@ def _valid_enum_value(cls, val: dict[str, EnumValue]) -> dict[str, EnumValue]: errors: list[str] = [] if invalid_pattern: keys = ", ".join(repr(k) for k in invalid_pattern) - errors.append( - f"Enum values {keys} do not match the required pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}" - ) + errors.append(f"Enum values {keys} do not match the required pattern: {ENUM_VALUE_IDENTIFIER_PATTERN}") if invalid_length: keys = ", ".join(repr(k) for k in invalid_length) errors.append(f"Enum values {keys} must be between 1 and 128 characters long.") if forbidden: keys = ", ".join(repr(k) for k in forbidden) - errors.append( - f"Enum values {keys} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}" - ) + errors.append(f"Enum values {keys} cannot be any of the following reserved values: {FORBIDDEN_ENUM_VALUES}") if errors: raise ValueError("; ".join(errors)) return val From c1b738bd556f51d40b75f470d214668b5a31f83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20Schj=C3=B8lberg?= Date: Thu, 26 Feb 2026 15:34:49 +0100 Subject: [PATCH 3/6] Retrigger CI --- cognite/neat/_data_model/importers/_table_importer/reader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cognite/neat/_data_model/importers/_table_importer/reader.py b/cognite/neat/_data_model/importers/_table_importer/reader.py index 5e28b28be9..301c1bb72e 100644 --- a/cognite/neat/_data_model/importers/_table_importer/reader.py +++ b/cognite/neat/_data_model/importers/_table_importer/reader.py @@ -1,5 +1,4 @@ import json -import re from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Literal, TypeVar, cast, overload From 32dabb56b4353aacfb03ddd94b2d6181313d9985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20Schj=C3=B8lberg?= Date: Thu, 26 Feb 2026 15:41:14 +0100 Subject: [PATCH 4/6] Retrigger CI --- cognite/neat/_data_model/importers/_table_importer/reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cognite/neat/_data_model/importers/_table_importer/reader.py b/cognite/neat/_data_model/importers/_table_importer/reader.py index 301c1bb72e..5e28b28be9 100644 --- a/cognite/neat/_data_model/importers/_table_importer/reader.py +++ b/cognite/neat/_data_model/importers/_table_importer/reader.py @@ -1,4 +1,5 @@ import json +import re from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Literal, TypeVar, cast, overload From 474c1ed7908a5f1383229a8daa6a78333bf2a3c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20Schj=C3=B8lberg?= Date: Thu, 26 Feb 2026 15:54:45 +0100 Subject: [PATCH 5/6] Revert unintended invariant change --- cognite/neat/_utils/validation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cognite/neat/_utils/validation.py b/cognite/neat/_utils/validation.py index f4eb5628b7..5d9611c3c5 100644 --- a/cognite/neat/_utils/validation.py +++ b/cognite/neat/_utils/validation.py @@ -148,7 +148,10 @@ def _enum_message(type_: str, loc: tuple[int | str, ...], context: ValidationCon location = context.humanize_location(loc[:-1] if loc[-1] == "values" else loc) if loc[-1] != "values": - return f"In {location}: unexpected enum validation error with message '{raw_msg}'" + raise RuntimeError( + f"_enum_message called with unexpected loc={loc!r}, type_={type_!r}, raw_msg={raw_msg!r}. " + "This is a bug in NEAT." + ) if type_ == "missing": return ( f"In {location}: definition should include " From 15f6a63848c29532987b50ebab22edad9fd81ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20Schj=C3=B8lberg?= Date: Fri, 27 Feb 2026 11:45:50 +0100 Subject: [PATCH 6/6] Revert change --- .../importers/_table_importer/reader.py | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/cognite/neat/_data_model/importers/_table_importer/reader.py b/cognite/neat/_data_model/importers/_table_importer/reader.py index 5e28b28be9..2332d6cd73 100644 --- a/cognite/neat/_data_model/importers/_table_importer/reader.py +++ b/cognite/neat/_data_model/importers/_table_importer/reader.py @@ -1,5 +1,4 @@ import json -import re from collections import defaultdict from dataclasses import dataclass, field from typing import Any, Literal, TypeVar, cast, overload @@ -24,11 +23,7 @@ ViewRequestProperty, ViewRequestPropertyAdapter, ) -from cognite.neat._data_model.models.dms._constants import ( - CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN, - DATA_MODEL_DESCRIPTION_MAX_LENGTH, - FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER, -) +from cognite.neat._data_model.models.dms._constants import DATA_MODEL_DESCRIPTION_MAX_LENGTH from cognite.neat._data_model.models.entities import ParsedEntity, parse_entity from cognite.neat._exceptions import DataModelImportException from cognite.neat._issues import ModelSyntaxError @@ -51,7 +46,6 @@ from .source import TableSource T_BaseModel = TypeVar("T_BaseModel", bound=BaseModel) -_PROPERTY_ID_PATTERN = re.compile(CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN) @dataclass @@ -434,29 +428,6 @@ def _process_view_property( view_entities: set[ParsedEntity], ) -> None: loc = (self.Sheets.properties, row_no) - if not _PROPERTY_ID_PATTERN.match(prop.view_property): - self.errors.append( - ModelSyntaxError( - message=( - f"In {self.source.location(loc)} column {self.PropertyColumns.view_property!r}" - f" value {prop.view_property!r} does not match the required" - f" pattern: {CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER_PATTERN}" - ) - ) - ) - return None - if prop.view_property in FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER: - self.errors.append( - ModelSyntaxError( - message=( - f"In {self.source.location(loc)} column {self.PropertyColumns.view_property!r}" - f" value {prop.view_property!r} is a reserved property identifier." - f" Reserved identifiers are: " - f"{humanize_collection(FORBIDDEN_CONTAINER_AND_VIEW_PROPERTIES_IDENTIFIER)}" - ) - ) - ) - return None data = self.read_view_property(prop, loc) view_prop = self._validate_adapter(ViewRequestPropertyAdapter, data, loc) if view_prop is None: