diff --git a/src/sssom/validators.py b/src/sssom/validators.py index 02bf6e38..6de1684a 100644 --- a/src/sssom/validators.py +++ b/src/sssom/validators.py @@ -1,7 +1,7 @@ """Validators.""" import logging -from typing import List +from typing import Dict, List from jsonschema import ValidationError @@ -18,7 +18,45 @@ from .constants import SCHEMA_YAML, SchemaValidationType -def validate(msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType]) -> None: +class ValidationResult: + """An individual validation result from a validation system.""" + + def __init__(self, category: str): + """Initialise the validation result. + + :param category: Name of the category this validation result belongs to. + """ + self.message = "" + self.category = category + self.check = "" + self.data: Dict = {} + + +class ValidationReport: + """A validation report that contains the results of various validation methods.""" + + def __init__(self, name: str): + """Initialise the validation report. + + :param name: Name of the category this validation report corresponds to. + """ + self.name = name + self.results: List[ValidationResult] = [] + + def add_result(self, result: ValidationResult): + """Add result to the validation report.""" + self.results.append(result) + + def print_report(self): + """Print the all results of the report.""" + print(f"Validation Report: {self.name}") + for result in self.results: + print(f"Category: {result.category}, Message: {result.message}") + + +def validate( + msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType] +) -> List[ValidationReport]: """Validate SSSOM files against `sssom-schema` using linkML's validator function. :param msdf: MappingSetDataFrame. @@ -29,21 +67,35 @@ def validate(msdf: MappingSetDataFrame, validation_types: List[SchemaValidationT SchemaValidationType.Shacl: validate_shacl, SchemaValidationType.PrefixMapCompleteness: check_all_prefixes_in_curie_map, } + results = [] for vt in validation_types: - validation_methods[vt](msdf) + result = validation_methods[vt](msdf) + results.append(result) + return results -def validate_json_schema(msdf: MappingSetDataFrame) -> None: +def validate_json_schema(msdf: MappingSetDataFrame, fail_hard=False) -> ValidationReport: """Validate JSON Schema using linkml's JsonSchemaDataValidator. :param msdf: MappingSetDataFrame to eb validated. + :param fail_hard: """ validator = ReferenceValidator(SchemaView(SCHEMA_YAML)) mapping_set = to_mapping_set_document(msdf).mapping_set - validator.validate(mapping_set, MappingSet) - - -def validate_shacl(msdf: MappingSetDataFrame) -> None: + results = validator.validate(mapping_set, MappingSet) + validation_results = ValidationReport("sssom_linkml_reference_validator") + if fail_hard and results: + raise ValidationError(f"Mapping set has validation errors: {results}.") + for res in results.results: + r = ValidationResult("sssom_linkml_reference_validator") + r.check = "TBD" + r.data = res + r.message = str(res) + validation_results.add_result(r) + return validation_results + + +def validate_shacl(msdf: MappingSetDataFrame) -> ValidationReport: """Validate SCHACL file. :param msdf: TODO: https://github.com/linkml/linkml/issues/850 . @@ -52,23 +104,11 @@ def validate_shacl(msdf: MappingSetDataFrame) -> None: raise NotImplementedError -def validate_sparql(msdf: MappingSetDataFrame) -> None: - """Validate SPARQL file. - - :param msdf: MappingSetDataFrame - :raises NotImplementedError: Not yet implemented. - """ - # queries = {} - # validator = SparqlDataValidator(SCHEMA_YAML,queries=queries) - # mapping_set = to_mapping_set_document(msdf).mapping_set - # TODO: Complete this function - raise NotImplementedError - - -def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame) -> None: +def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_hard=True) -> ValidationReport: """Check all `EntityReference` slots are mentioned in 'curie_map'. :param msdf: MappingSetDataFrame + :param fail_hard: If true, validation will fail on first error :raises ValidationError: If all prefixes not in curie_map. """ prefixes = get_all_prefixes(msdf) @@ -84,5 +124,7 @@ def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame) -> None: for pref in prefixes: if pref != "" and pref not in list(msdf.prefix_map.keys()): missing_prefixes.append(pref) - if missing_prefixes: + if missing_prefixes and fail_hard: raise ValidationError(f"The prefixes in {missing_prefixes} are missing from 'curie_map'.") + report = ValidationReport("sssom_missing_prefixes") + return report diff --git a/tests/test_validate.py b/tests/test_validate.py index e267586a..254dca3d 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -27,7 +27,8 @@ def test_validate_json(self): Validate of the incoming file (basic.tsv) abides by the rules set by `sssom-schema`. """ - self.assertIsNone(validate(self.correct_msdf1, self.validation_types)) + results = validate(self.correct_msdf1, self.validation_types) + self.assertEqual(len(results), 2) def test_validate_json_fail(self): """