Skip to content

Commit 45e7fd9

Browse files
authored
Return validation reports (#579)
1 parent 4b4fb20 commit 45e7fd9

File tree

3 files changed

+40
-15
lines changed

3 files changed

+40
-15
lines changed

src/sssom/io.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import yaml
1414
from curies import Converter
1515
from deprecation import deprecated
16+
from linkml.validator import ValidationReport
1617

1718
from sssom.validators import validate
1819

@@ -98,17 +99,23 @@ def parse_file(
9899
write_table(doc, output, embedded_mode)
99100

100101

101-
def validate_file(input_path: str, validation_types: List[SchemaValidationType]) -> None:
102+
def validate_file(
103+
input_path: str,
104+
validation_types: Optional[List[SchemaValidationType]] = None,
105+
fail_on_error: bool = True,
106+
) -> dict[SchemaValidationType, ValidationReport]:
102107
"""Validate the incoming SSSOM TSV according to the SSSOM specification.
103108
104109
:param input_path: The path to the input file in one of the legal formats, eg obographs, aligmentapi-xml
105110
:param validation_types: A list of validation types to run.
111+
:param fail_on_error: Should an exception be raised on error of _any_ validator?
112+
:returns: A dictionary from validation types to validation reports
106113
"""
107114
# Two things to check:
108115
# 1. All prefixes in the DataFrame are define in prefix_map
109116
# 2. All columns in the DataFrame abide by sssom-schema.
110117
msdf = parse_sssom_table(file_path=input_path)
111-
validate(msdf=msdf, validation_types=validation_types)
118+
return validate(msdf=msdf, validation_types=validation_types, fail_on_error=fail_on_error)
112119

113120

114121
def split_file(input_path: str, output_directory: Union[str, Path]) -> None:

src/sssom/validators.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Validators."""
22

33
import logging
4-
from typing import Callable, List, Mapping
4+
from typing import Callable, List, Mapping, Optional
55

66
from jsonschema import ValidationError
77
from linkml.validator import ValidationReport, Validator
@@ -12,22 +12,29 @@
1212
from sssom.parsers import to_mapping_set_document
1313
from sssom.util import MappingSetDataFrame, get_all_prefixes
1414

15-
from .constants import SCHEMA_YAML, SchemaValidationType, _get_sssom_schema_object
15+
from .constants import (
16+
DEFAULT_VALIDATION_TYPES,
17+
SCHEMA_YAML,
18+
SchemaValidationType,
19+
_get_sssom_schema_object,
20+
)
1621

1722

1823
def validate(
1924
msdf: MappingSetDataFrame,
20-
validation_types: List[SchemaValidationType],
25+
validation_types: Optional[List[SchemaValidationType]] = None,
2126
fail_on_error: bool = True,
22-
) -> None:
27+
) -> dict[SchemaValidationType, ValidationReport]:
2328
"""Validate SSSOM files against `sssom-schema` using linkML's validator function.
2429
2530
:param msdf: MappingSetDataFrame.
2631
:param validation_types: SchemaValidationType
2732
:param fail_on_error: If true, throw an error when execution of a method has failed
33+
:returns: A dictionary from validation types to validation reports
2834
"""
29-
for vt in validation_types:
30-
VALIDATION_METHODS[vt](msdf, fail_on_error)
35+
if validation_types is None:
36+
validation_types = DEFAULT_VALIDATION_TYPES
37+
return {vt: VALIDATION_METHODS[vt](msdf, fail_on_error) for vt in validation_types}
3138

3239

3340
def print_linkml_report(report: ValidationReport, fail_on_error: bool = True):
@@ -88,7 +95,7 @@ def _clean_dict(d):
8895
return cleaned_dict
8996

9097

91-
def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
98+
def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
9299
"""Validate JSON Schema using linkml's JsonSchemaDataValidator.
93100
94101
:param msdf: MappingSetDataFrame to eb validated.
@@ -106,9 +113,10 @@ def validate_json_schema(msdf: MappingSetDataFrame, fail_on_error: bool = True)
106113

107114
report = validator.validate(mapping_set_dict, "mapping set")
108115
print_linkml_report(report, fail_on_error)
116+
return report
109117

110118

111-
def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
119+
def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
112120
"""Validate SCHACL file.
113121
114122
:param msdf: TODO: https://github.com/linkml/linkml/issues/850 .
@@ -118,7 +126,7 @@ def validate_shacl(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> Non
118126
raise NotImplementedError
119127

120128

121-
def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
129+
def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> ValidationReport:
122130
"""Validate SPARQL file.
123131
124132
:param msdf: MappingSetDataFrame
@@ -132,7 +140,9 @@ def validate_sparql(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> No
132140
raise NotImplementedError
133141

134142

135-
def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
143+
def check_all_prefixes_in_curie_map(
144+
msdf: MappingSetDataFrame, fail_on_error: bool = True
145+
) -> ValidationReport:
136146
"""Check all `EntityReference` slots are mentioned in 'curie_map'.
137147
138148
:param msdf: MappingSetDataFrame
@@ -154,9 +164,12 @@ def check_all_prefixes_in_curie_map(msdf: MappingSetDataFrame, fail_on_error: bo
154164
)
155165
report = ValidationReport(results=validation_results)
156166
print_linkml_report(report, fail_on_error)
167+
return report
157168

158169

159-
def check_strict_curie_format(msdf: MappingSetDataFrame, fail_on_error: bool = True) -> None:
170+
def check_strict_curie_format(
171+
msdf: MappingSetDataFrame, fail_on_error: bool = True
172+
) -> ValidationReport:
160173
"""Check all `EntityReference` slots are formatted as unambiguous curies.
161174
162175
Implemented rules:
@@ -194,9 +207,10 @@ def check_strict_curie_format(msdf: MappingSetDataFrame, fail_on_error: bool = T
194207

195208
report = ValidationReport(results=validation_results)
196209
print_linkml_report(report, fail_on_error)
210+
return report
197211

198212

199-
VALIDATION_METHODS: Mapping[SchemaValidationType, Callable] = {
213+
VALIDATION_METHODS: Mapping[SchemaValidationType, Callable[..., ValidationReport]] = {
200214
SchemaValidationType.JsonSchema: validate_json_schema,
201215
SchemaValidationType.Shacl: validate_shacl,
202216
SchemaValidationType.Sparql: validate_sparql,

tests/test_validate.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ def test_validate_json(self):
2828
Validate of the incoming file (basic.tsv) abides
2929
by the rules set by `sssom-schema`.
3030
"""
31-
self.assertIsNone(validate(self.correct_msdf1, self.validation_types))
31+
rv = validate(self.correct_msdf1, self.validation_types)
32+
self.assertIsNotNone(rv)
33+
self.assertIn(SchemaValidationType.JsonSchema, rv)
34+
json_validation = rv[SchemaValidationType.JsonSchema]
35+
self.assertEqual([], json_validation.results)
3236

3337
@unittest.skip(
3438
reason="""\

0 commit comments

Comments
 (0)