Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 65 additions & 23 deletions src/sssom/validators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Validators."""

import logging
from typing import List
from typing import Dict, List

from jsonschema import ValidationError

Expand All @@ -18,7 +18,45 @@
from .constants import SCHEMA_YAML, SchemaValidationType


def validate(msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType]) -> None:
class ValidationResult:
"""An individual validation result from a validation system."""

def __init__(self, category: str):
"""Initialise the validation result.

:param category: Name of the category this validation result belongs to.
"""
self.message = ""
self.category = category
self.check = ""
self.data: Dict = {}


class ValidationReport:
"""A validation report that contains the results of various validation methods."""

def __init__(self, name: str):
"""Initialise the validation report.

:param name: Name of the category this validation report corresponds to.
"""
self.name = name
self.results: List[ValidationResult] = []

def add_result(self, result: ValidationResult):
"""Add result to the validation report."""
self.results.append(result)

def print_report(self):
"""Print the all results of the report."""
print(f"Validation Report: {self.name}")
for result in self.results:
print(f"Category: {result.category}, Message: {result.message}")


def validate(
msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType]
) -> List[ValidationReport]:
"""Validate SSSOM files against `sssom-schema` using linkML's validator function.

:param msdf: MappingSetDataFrame.
Expand All @@ -29,21 +67,35 @@ def validate(msdf: MappingSetDataFrame, validation_types: List[SchemaValidationT
SchemaValidationType.Shacl: validate_shacl,
SchemaValidationType.PrefixMapCompleteness: check_all_prefixes_in_curie_map,
}
results = []
for vt in validation_types:
validation_methods[vt](msdf)
result = validation_methods[vt](msdf)
results.append(result)
return results


def validate_json_schema(msdf: MappingSetDataFrame) -> None:
def validate_json_schema(msdf: MappingSetDataFrame, fail_hard=False) -> ValidationReport:
"""Validate JSON Schema using linkml's JsonSchemaDataValidator.

:param msdf: MappingSetDataFrame to eb validated.
:param fail_hard:
"""
validator = ReferenceValidator(SchemaView(SCHEMA_YAML))
mapping_set = to_mapping_set_document(msdf).mapping_set
validator.validate(mapping_set, MappingSet)


def validate_shacl(msdf: MappingSetDataFrame) -> None:
results = validator.validate(mapping_set, MappingSet)
validation_results = ValidationReport("sssom_linkml_reference_validator")
if fail_hard and results:
raise ValidationError(f"Mapping set has validation errors: {results}.")
for res in results.results:
r = ValidationResult("sssom_linkml_reference_validator")
r.check = "TBD"
r.data = res
r.message = str(res)
validation_results.add_result(r)
return validation_results


def validate_shacl(msdf: MappingSetDataFrame) -> ValidationReport:
"""Validate SCHACL file.

:param msdf: TODO: https://github.com/linkml/linkml/issues/850 .
Expand All @@ -52,23 +104,11 @@ def validate_shacl(msdf: MappingSetDataFrame) -> None:
raise NotImplementedError


def validate_sparql(msdf: MappingSetDataFrame) -> None:
"""Validate SPARQL file.

:param msdf: MappingSetDataFrame
:raises NotImplementedError: Not yet implemented.
"""
# queries = {}
# validator = SparqlDataValidator(SCHEMA_YAML,queries=queries)
# mapping_set = to_mapping_set_document(msdf).mapping_set
# TODO: Complete this function
raise NotImplementedError


def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame) -> None:
def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_hard=True) -> ValidationReport:
"""Check all `EntityReference` slots are mentioned in 'curie_map'.

:param msdf: MappingSetDataFrame
:param fail_hard: If true, validation will fail on first error
:raises ValidationError: If all prefixes not in curie_map.
"""
prefixes = get_all_prefixes(msdf)
Expand All @@ -84,5 +124,7 @@ def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame) -> None:
for pref in prefixes:
if pref != "" and pref not in list(msdf.prefix_map.keys()):
missing_prefixes.append(pref)
if missing_prefixes:
if missing_prefixes and fail_hard:
raise ValidationError(f"The prefixes in {missing_prefixes} are missing from 'curie_map'.")
report = ValidationReport("sssom_missing_prefixes")
return report
3 changes: 2 additions & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def test_validate_json(self):
Validate of the incoming file (basic.tsv) abides
by the rules set by `sssom-schema`.
"""
self.assertIsNone(validate(self.correct_msdf1, self.validation_types))
results = validate(self.correct_msdf1, self.validation_types)
self.assertEqual(len(results), 2)

def test_validate_json_fail(self):
"""
Expand Down