Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions openMINDS_validation/validation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import re
import logging
import json
import unicodedata
import urllib.request
import urllib.error
from pathlib import Path, PurePath
from typing import Optional

from openMINDS_validation.utils import VocabManager, Versions, load_json, get_latest_version_commit, version_key, \
find_openminds_class, clone_central, expand_jsonld, fetch_remote_schema_extends
Expand Down Expand Up @@ -128,9 +129,31 @@ def __init__(self, absolute_path):
self.namespaces = Versions("./versions.json").versions[self.version]['namespaces']
self.vocab = VocabManager("./types.json", "./properties.json")
self.instance = load_json(absolute_path)
self._id_instance_name = self.instance['@id'].split('/')[-1]
self._type_schema_name = None
self._id_schema_name = None

def _generate_expected_atid_name(name: str) -> Optional[str]:
sanitized = (
name
.translate(str.maketrans("", " ", "(),'\""))
.replace(".", "Dot ")
.replace("/", "_")
.replace("&", "And")
)
normalized = unicodedata.normalize("NFKD", sanitized)
normalized = normalized.encode("ascii", "ignore").decode("ascii")

words = normalized.split()
if not words:
return None

first = words[0] if words[0].isupper() or (
all(w[:1].isupper() for w in words if w[0].isalpha() and w.lower() not in {"and", "by"})
) else words[0].lower()

return first + ''.join(w if w.isupper() else w.title() for w in words[1:])

def _nested_instance(self, value, function, instance_type):
if isinstance(value, dict):
function(value, instance_type)
Expand All @@ -144,6 +167,7 @@ def check_atid_convention(self):
Validates against:
- White space in @id and embedded @id.
- Differences between file name and @id.
- @id naming convention, using abbreviation if present, otherwise name.
"""
def _check_instance_id_convention(instance):
if instance is not None and '@id' in instance:
Expand All @@ -152,13 +176,25 @@ def _check_instance_id_convention(instance):
if instance['@id'].count('/') != 5:
logging.error(f'Unexpected number of "/" for @id: "{instance["@id"]}".')

# TODO use a dictionary of abbreviations and Upper case name
_id_instance_name = self.instance['@id'].split('/')[-1]
# TODO instead of using filename (abbreviations and other properties could be used)
if _id_instance_name != self.file_name:
logging.error(f'Mismatch between @id entity "{_id_instance_name}" and file name "{self.file_name}".')
# Differences between file name and @id
if self._id_instance_name != self.file_name:
logging.error(f'Mismatch between @id entity "{self._id_instance_name}" and file name "{self.file_name}".')
_check_instance_id_convention(self.instance)

# TODO add convention for the Accessibility library
# @id naming convention
name = self.instance.get('abbreviation') or self.instance.get('name')
if not name:
logging.warning('Property abbreviation/name is missing.')
else:
expected = self._generate_expected_atid_name(name)
if expected is None:
logging.warning('Property name is empty.')
elif self._id_instance_name != expected:
logging.warning(f'Unexpected @id entity: "{self._id_instance_name}" '
f'(full @id: {self.instance["@id"]}), (expected: {expected}).')

# White space in @id and embedded @id
for property in self.instance:
if self.instance[property] is not None and type(self.instance[property]) is dict and '@id' in self.instance[property]:
_check_instance_id_convention(self.instance[property])
Expand Down