Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import yaml
from curies import Converter
from deprecation import deprecated
from linkml.validator import ValidationReport

from sssom.validators import validate

Expand Down Expand Up @@ -98,17 +99,23 @@ def parse_file(
write_table(doc, output, embedded_mode)


def validate_file(input_path: str, validation_types: List[SchemaValidationType]) -> None:
def validate_file(
input_path: str,
validation_types: Optional[List[SchemaValidationType]] = None,
fail_on_error: bool = True,
) -> dict[SchemaValidationType, ValidationReport]:
"""Validate the incoming SSSOM TSV according to the SSSOM specification.

:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
:param validation_types: A list of validation types to run.
:param fail_on_error: Should an exception be raised on error of _any_ validator?
:returns: A dictionary from validation types to validation reports
"""
# Two things to check:
# 1. All prefixes in the DataFrame are define in prefix_map
# 2. All columns in the DataFrame abide by sssom-schema.
msdf = parse_sssom_table(file_path=input_path)
validate(msdf=msdf, validation_types=validation_types)
return validate(msdf=msdf, validation_types=validation_types, fail_on_error=fail_on_error)


def split_file(input_path: str, output_directory: Union[str, Path]) -> None:
Expand Down
38 changes: 26 additions & 12 deletions src/sssom/validators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Validators."""

import logging
from typing import Callable, List, Mapping
from typing import Callable, List, Mapping, Optional

from jsonschema import ValidationError
from linkml.validator import ValidationReport, Validator
Expand All @@ -12,22 +12,29 @@
from sssom.parsers import to_mapping_set_document
from sssom.util import MappingSetDataFrame, get_all_prefixes

from .constants import SCHEMA_YAML, SchemaValidationType, _get_sssom_schema_object
from .constants import (
DEFAULT_VALIDATION_TYPES,
SCHEMA_YAML,
SchemaValidationType,
_get_sssom_schema_object,
)


def validate(
msdf: MappingSetDataFrame,
validation_types: List[SchemaValidationType],
validation_types: Optional[List[SchemaValidationType]] = None,
fail_on_error: bool = True,
) -> None:
) -> dict[SchemaValidationType, ValidationReport]:
"""Validate SSSOM files against `sssom-schema` using linkML's validator function.

:param msdf: MappingSetDataFrame.
:param validation_types: SchemaValidationType
:param fail_on_error: If true, throw an error when execution of a method has failed
:returns: A dictionary from validation types to validation reports
"""
for vt in validation_types:
VALIDATION_METHODS[vt](msdf, fail_on_error)
if validation_types is None:
validation_types = DEFAULT_VALIDATION_TYPES
return {vt: VALIDATION_METHODS[vt](msdf, fail_on_error) for vt in validation_types}


def print_linkml_report(report: ValidationReport, fail_on_error: bool = True):
Expand Down Expand Up @@ -88,7 +95,7 @@ def _clean_dict(d):
return cleaned_dict


def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
"""Validate JSON Schema using linkml's JsonSchemaDataValidator.

:param msdf: MappingSetDataFrame to eb validated.
Expand All @@ -106,9 +113,10 @@ def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True)

report = validator.validate(mapping_set_dict, "mapping set")
print_linkml_report(report, fail_on_error)
return report


def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
"""Validate SCHACL file.

:param msdf: TODO: https://github.com/linkml/linkml/issues/850 .
Expand All @@ -118,7 +126,7 @@ def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> Non
raise NotImplementedError


def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
"""Validate SPARQL file.

:param msdf: MappingSetDataFrame
Expand All @@ -132,7 +140,9 @@ def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> No
raise NotImplementedError


def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
def check_all_prefixes_in_curie_map(
msdf: MappingSetDataFrame, fail_on_error: bool = True
) -> ValidationReport:
"""Check all `EntityReference` slots are mentioned in 'curie_map'.

:param msdf: MappingSetDataFrame
Expand All @@ -154,9 +164,12 @@ def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_on_error: bo
)
report = ValidationReport(results=validation_results)
print_linkml_report(report, fail_on_error)
return report


def check_strict_curie_format(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
def check_strict_curie_format(
msdf: MappingSetDataFrame, fail_on_error: bool = True
) -> ValidationReport:
"""Check all `EntityReference` slots are formatted as unambiguous curies.

Implemented rules:
Expand Down Expand Up @@ -194,9 +207,10 @@ def check_strict_curie_format(msdf: MappingSetDataFrame, fail_on_error: bool = T

report = ValidationReport(results=validation_results)
print_linkml_report(report, fail_on_error)
return report


VALIDATION_METHODS: Mapping[SchemaValidationType, Callable] = {
VALIDATION_METHODS: Mapping[SchemaValidationType, Callable[..., ValidationReport]] = {
SchemaValidationType.JsonSchema: validate_json_schema,
SchemaValidationType.Shacl: validate_shacl,
SchemaValidationType.Sparql: validate_sparql,
Expand Down
6 changes: 5 additions & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ def test_validate_json(self):
Validate of the incoming file (basic.tsv) abides
by the rules set by `sssom-schema`.
"""
self.assertIsNone(validate(self.correct_msdf1, self.validation_types))
rv = validate(self.correct_msdf1, self.validation_types)
self.assertIsNotNone(rv)
self.assertIn(SchemaValidationType.JsonSchema, rv)
json_validation = rv[SchemaValidationType.JsonSchema]
self.assertEqual([], json_validation.results)

@unittest.skip(
reason="""\
Expand Down
Loading