From cc249d905ac0128ef4c2e258b6dab3e3ef9993d7 Mon Sep 17 00:00:00 2001 From: raphaelgazzotti Date: Tue, 13 Jan 2026 11:39:25 +0100 Subject: [PATCH 1/4] Add validation for @id naming based on abbreviation/name convention. --- openMINDS_validation/validation.py | 37 ++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/openMINDS_validation/validation.py b/openMINDS_validation/validation.py index 42c9021..99c4311 100644 --- a/openMINDS_validation/validation.py +++ b/openMINDS_validation/validation.py @@ -1,6 +1,7 @@ import re import logging import json +import unicodedata import urllib.request import urllib.error from pathlib import Path, PurePath @@ -128,6 +129,7 @@ def __init__(self, absolute_path): self.namespaces = Versions("./versions.json").versions[self.version]['namespaces'] self.vocab = VocabManager("./types.json", "./properties.json") self.instance = load_json(absolute_path) + self._id_instance_name = self.instance['@id'].split('/')[-1] self._type_schema_name = None self._id_schema_name = None @@ -139,6 +141,33 @@ def _nested_instance(self, value, function, instance_type): for item in value: self._nested_instance(item, function, instance_type) + def _check_atid_naming(self): + """ + Validates @id entity against openMINDS naming convention derived from the abbreviation otname property. + The abbreviation property is used when present, otherwise, the name property is used. + """ + name = self.instance['abbreviation'] if 'abbreviation' in self.instance else self.instance['name'] if 'name' in self.instance else None + if not name: + return logging.warning(f'Property abbreviation/name is missing.') + + sanitized = ( + name + .translate(str.maketrans("", "", "()'")) + .replace("/", "_") + .replace("&", "And") + ) + normalized = unicodedata.normalize("NFKD", sanitized) + normalized = normalized.encode("ascii", "ignore").decode("ascii") + + words = normalized.split() + if not words: + return logging.warning(f'Property name is empty.') + + first = words[0] if words[0].isupper() or (all(w[:1].isupper() for w in words if w[0].isalpha() and w.lower() not in {"and", "by"})) else words[0].lower() + expected = first + ''.join(w if w.isupper() else w.title() for w in words[1:]) + if self._id_instance_name != expected: + logging.warning(f'Unexpected @id entity: "{self._id_instance_name}" (full @id: {self.instance["@id"]}), (expected: {expected}).') + def check_atid_convention(self): """ Validates against: @@ -152,12 +181,10 @@ def _check_instance_id_convention(instance): if instance['@id'].count('/') != 5: logging.error(f'Unexpected number of "/" for @id: "{instance["@id"]}".') - # TODO use a dictionary of abbreviations and Upper case name - _id_instance_name = self.instance['@id'].split('/')[-1] - # TODO instead of using filename (abbreviations and other properties could be used) - if _id_instance_name != self.file_name: - logging.error(f'Mismatch between @id entity "{_id_instance_name}" and file name "{self.file_name}".') + if self._id_instance_name != self.file_name: + logging.error(f'Mismatch between @id entity "{self._id_instance_name}" and file name "{self.file_name}".') _check_instance_id_convention(self.instance) + self._check_atid_naming() for property in self.instance: if self.instance[property] is not None and type(self.instance[property]) is dict and '@id' in self.instance[property]: From f4b610b6e0ffee5ab29c9198184e3d58db521916 Mon Sep 17 00:00:00 2001 From: raphaelgazzotti Date: Fri, 20 Feb 2026 17:54:46 +0100 Subject: [PATCH 2/4] Handle dot character and capitalize after \' and \" characters. --- openMINDS_validation/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openMINDS_validation/validation.py b/openMINDS_validation/validation.py index 99c4311..af51392 100644 --- a/openMINDS_validation/validation.py +++ b/openMINDS_validation/validation.py @@ -152,7 +152,8 @@ def _check_atid_naming(self): sanitized = ( name - .translate(str.maketrans("", "", "()'")) + .translate(str.maketrans("", " ", "(),'\"")) + .replace(".", "Dot ") .replace("/", "_") .replace("&", "And") ) From d71e1be06b356ee3363f8f4f8bfb12a814240d6e Mon Sep 17 00:00:00 2001 From: raphaelgazzotti Date: Mon, 23 Mar 2026 10:23:47 +0100 Subject: [PATCH 3/4] Refactoring of check_atid_convention to include @id naming convention checking --- openMINDS_validation/validation.py | 55 +++++++++++++++++------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/openMINDS_validation/validation.py b/openMINDS_validation/validation.py index af51392..f72efd7 100644 --- a/openMINDS_validation/validation.py +++ b/openMINDS_validation/validation.py @@ -1,10 +1,10 @@ import re import logging -import json import unicodedata import urllib.request import urllib.error from pathlib import Path, PurePath +from typing import Optional from openMINDS_validation.utils import VocabManager, Versions, load_json, get_latest_version_commit, version_key, \ find_openminds_class, clone_central, expand_jsonld, fetch_remote_schema_extends @@ -133,23 +133,7 @@ def __init__(self, absolute_path): self._type_schema_name = None self._id_schema_name = None - def _nested_instance(self, value, function, instance_type): - if isinstance(value, dict): - function(value, instance_type) - - elif isinstance(value, list): - for item in value: - self._nested_instance(item, function, instance_type) - - def _check_atid_naming(self): - """ - Validates @id entity against openMINDS naming convention derived from the abbreviation otname property. - The abbreviation property is used when present, otherwise, the name property is used. - """ - name = self.instance['abbreviation'] if 'abbreviation' in self.instance else self.instance['name'] if 'name' in self.instance else None - if not name: - return logging.warning(f'Property abbreviation/name is missing.') - + def _generate_expected_atid_name(name: str) -> Optional[str]: sanitized = ( name .translate(str.maketrans("", " ", "(),'\"")) @@ -162,18 +146,28 @@ def _check_atid_naming(self): words = normalized.split() if not words: - return logging.warning(f'Property name is empty.') + return None - first = words[0] if words[0].isupper() or (all(w[:1].isupper() for w in words if w[0].isalpha() and w.lower() not in {"and", "by"})) else words[0].lower() - expected = first + ''.join(w if w.isupper() else w.title() for w in words[1:]) - if self._id_instance_name != expected: - logging.warning(f'Unexpected @id entity: "{self._id_instance_name}" (full @id: {self.instance["@id"]}), (expected: {expected}).') + first = words[0] if words[0].isupper() or ( + all(w[:1].isupper() for w in words if w[0].isalpha() and w.lower() not in {"and", "by"}) + ) else words[0].lower() + + return first + ''.join(w if w.isupper() else w.title() for w in words[1:]) + + def _nested_instance(self, value, function, instance_type): + if isinstance(value, dict): + function(value, instance_type) + + elif isinstance(value, list): + for item in value: + self._nested_instance(item, function, instance_type) def check_atid_convention(self): """ Validates against: - White space in @id and embedded @id. - Differences between file name and @id. + - @id naming convention, using abbreviation if present, otherwise name. """ def _check_instance_id_convention(instance): if instance is not None and '@id' in instance: @@ -182,11 +176,24 @@ def _check_instance_id_convention(instance): if instance['@id'].count('/') != 5: logging.error(f'Unexpected number of "/" for @id: "{instance["@id"]}".') + # Differences between file name and @id if self._id_instance_name != self.file_name: logging.error(f'Mismatch between @id entity "{self._id_instance_name}" and file name "{self.file_name}".') _check_instance_id_convention(self.instance) - self._check_atid_naming() + # @id naming convention + name = self.instance.get('abbreviation') or self.instance.get('name') + if not name: + logging.warning('Property abbreviation/name is missing.') + else: + expected = self._generate_expected_atid_name(name) + if expected is None: + logging.warning('Property name is empty.') + elif self._id_instance_name != expected: + logging.warning(f'Unexpected @id entity: "{self._id_instance_name}" ' + f'(full @id: {self.instance["@id"]}), (expected: {expected}).') + + # White space in @id and embedded @id for property in self.instance: if self.instance[property] is not None and type(self.instance[property]) is dict and '@id' in self.instance[property]: _check_instance_id_convention(self.instance[property]) From a7c0f6288634e9541df2bd139ccd49880d0afaac Mon Sep 17 00:00:00 2001 From: raphaelgazzotti Date: Mon, 23 Mar 2026 10:26:38 +0100 Subject: [PATCH 4/4] Add TODO indication --- openMINDS_validation/validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/openMINDS_validation/validation.py b/openMINDS_validation/validation.py index f72efd7..97a6028 100644 --- a/openMINDS_validation/validation.py +++ b/openMINDS_validation/validation.py @@ -181,6 +181,7 @@ def _check_instance_id_convention(instance): logging.error(f'Mismatch between @id entity "{self._id_instance_name}" and file name "{self.file_name}".') _check_instance_id_convention(self.instance) + # TODO add convention for the Accessibility library # @id naming convention name = self.instance.get('abbreviation') or self.instance.get('name') if not name: