diff --git a/openMINDS_validation/validation.py b/openMINDS_validation/validation.py index 42c9021..97a6028 100644 --- a/openMINDS_validation/validation.py +++ b/openMINDS_validation/validation.py @@ -1,9 +1,10 @@ import re import logging -import json +import unicodedata import urllib.request import urllib.error from pathlib import Path, PurePath +from typing import Optional from openMINDS_validation.utils import VocabManager, Versions, load_json, get_latest_version_commit, version_key, \ find_openminds_class, clone_central, expand_jsonld, fetch_remote_schema_extends @@ -128,9 +129,31 @@ def __init__(self, absolute_path): self.namespaces = Versions("./versions.json").versions[self.version]['namespaces'] self.vocab = VocabManager("./types.json", "./properties.json") self.instance = load_json(absolute_path) + self._id_instance_name = self.instance['@id'].split('/')[-1] self._type_schema_name = None self._id_schema_name = None + def _generate_expected_atid_name(name: str) -> Optional[str]: + sanitized = ( + name + .translate(str.maketrans("", " ", "(),'\"")) + .replace(".", "Dot ") + .replace("/", "_") + .replace("&", "And") + ) + normalized = unicodedata.normalize("NFKD", sanitized) + normalized = normalized.encode("ascii", "ignore").decode("ascii") + + words = normalized.split() + if not words: + return None + + first = words[0] if words[0].isupper() or ( + all(w[:1].isupper() for w in words if w[0].isalpha() and w.lower() not in {"and", "by"}) + ) else words[0].lower() + + return first + ''.join(w if w.isupper() else w.title() for w in words[1:]) + def _nested_instance(self, value, function, instance_type): if isinstance(value, dict): function(value, instance_type) @@ -144,6 +167,7 @@ def check_atid_convention(self): Validates against: - White space in @id and embedded @id. - Differences between file name and @id. + - @id naming convention, using abbreviation if present, otherwise name. """ def _check_instance_id_convention(instance): if instance is not None and '@id' in instance: @@ -152,13 +176,25 @@ def _check_instance_id_convention(instance): if instance['@id'].count('/') != 5: logging.error(f'Unexpected number of "/" for @id: "{instance["@id"]}".') - # TODO use a dictionary of abbreviations and Upper case name - _id_instance_name = self.instance['@id'].split('/')[-1] - # TODO instead of using filename (abbreviations and other properties could be used) - if _id_instance_name != self.file_name: - logging.error(f'Mismatch between @id entity "{_id_instance_name}" and file name "{self.file_name}".') + # Differences between file name and @id + if self._id_instance_name != self.file_name: + logging.error(f'Mismatch between @id entity "{self._id_instance_name}" and file name "{self.file_name}".') _check_instance_id_convention(self.instance) + # TODO add convention for the Accessibility library + # @id naming convention + name = self.instance.get('abbreviation') or self.instance.get('name') + if not name: + logging.warning('Property abbreviation/name is missing.') + else: + expected = self._generate_expected_atid_name(name) + if expected is None: + logging.warning('Property name is empty.') + elif self._id_instance_name != expected: + logging.warning(f'Unexpected @id entity: "{self._id_instance_name}" ' + f'(full @id: {self.instance["@id"]}), (expected: {expected}).') + + # White space in @id and embedded @id for property in self.instance: if self.instance[property] is not None and type(self.instance[property]) is dict and '@id' in self.instance[property]: _check_instance_id_convention(self.instance[property])