Skip to content

Commit ef7d405

Browse files
committed
Split validation in steps
XSD validation and schematron validations are now performed in different steps (1 step for XSD and 1 step for each schematron file)
1 parent ae9bcb6 commit ef7d405

File tree

2 files changed

+77
-62
lines changed

2 files changed

+77
-62
lines changed
Lines changed: 54 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
import sys
2+
import argparse
23
from lxml import etree, isoschematron
34

45

5-
if __name__ == '__main__':
6-
assert len(sys.argv) == 6
7-
xml_filepath, xsd_filepath, sch_structure_filepath, sch_references_filepath, sch_business_filepath = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]
8-
9-
# Parse XML document once
10-
with open(xml_filepath, 'rb') as xml:
11-
xml_doc = etree.parse(xml)
12-
13-
# Validate against XSD
6+
def validate_xsd(xml_doc, xsd_filepath):
7+
"""Validate XML document against XSD schema."""
148
with open(xsd_filepath, 'rb') as xsd:
159
xsd_doc = etree.parse(xsd)
1610
schema = etree.XMLSchema(xsd_doc)
@@ -20,69 +14,72 @@
2014
print('! XSD validation failed.')
2115
for error in schema.error_log:
2216
print(' -', error.message)
17+
return False
2318
else:
2419
print('XSD validation passed.')
25-
26-
# Validate against Structure Schematron
27-
with open(sch_structure_filepath, 'rb') as sch:
20+
return True
21+
22+
23+
def validate_schematron(xml_doc, sch_filepath, validation_type):
24+
"""Validate XML document against Schematron schema."""
25+
with open(sch_filepath, 'rb') as sch:
2826
sch_doc = etree.parse(sch)
2927
schematron = isoschematron.Schematron(sch_doc, store_report=True)
30-
sch_structure_valid = schematron.validate(xml_doc)
28+
sch_valid = schematron.validate(xml_doc)
3129

32-
if not sch_structure_valid:
33-
print('! Schematron structure validation failed.')
30+
if not sch_valid:
31+
print(f'! Schematron {validation_type} validation failed.')
3432
svrl = schematron.validation_report
3533
if svrl is not None:
3634
for failed in svrl.xpath('//svrl:failed-assert',
37-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'}):
35+
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'}):
3836
location = failed.get('location', 'unknown')
3937
messages = failed.xpath('svrl:text/text()',
40-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'})
38+
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'})
4139
message = messages[0].strip() if messages else 'No message provided'
4240
print(f' - {location}: {message}')
41+
return False
4342
else:
44-
print('Schematron structure validation passed.')
43+
print(f'Schematron {validation_type} validation passed.')
44+
return True
45+
46+
47+
if __name__ == '__main__':
48+
parser = argparse.ArgumentParser(description='Validate XML against XSD and Schematron schemas')
49+
parser.add_argument('xml_file', help='Path to XML file to validate')
50+
parser.add_argument('--xsd', help='Path to XSD schema file')
51+
parser.add_argument('--schematron', help='Path to Schematron schema file')
4552

46-
# Validate against References Schematron
47-
with open(sch_references_filepath, 'rb') as sch:
48-
sch_doc = etree.parse(sch)
49-
schematron = isoschematron.Schematron(sch_doc, store_report=True)
50-
sch_references_valid = schematron.validate(xml_doc)
53+
args = parser.parse_args()
5154

52-
if not sch_references_valid:
53-
print('! Schematron references validation failed.')
54-
svrl = schematron.validation_report
55-
if svrl is not None:
56-
for failed in svrl.xpath('//svrl:failed-assert',
57-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'}):
58-
location = failed.get('location', 'unknown')
59-
messages = failed.xpath('svrl:text/text()',
60-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'})
61-
message = messages[0].strip() if messages else 'No message provided'
62-
print(f' - {location}: {message}')
63-
else:
64-
print('Schematron references validation passed.')
55+
if not args.xsd and not args.schematron:
56+
print('Error: Either --xsd or --schematron must be specified')
57+
sys.exit(1)
6558

66-
# Validate against Business Rules Schematron
67-
with open(sch_business_filepath, 'rb') as sch:
68-
sch_doc = etree.parse(sch)
69-
schematron = isoschematron.Schematron(sch_doc, store_report=True)
70-
sch_business_valid = schematron.validate(xml_doc)
59+
# Parse XML document once
60+
with open(args.xml_file, 'rb') as xml:
61+
xml_doc = etree.parse(xml)
7162

72-
if not sch_business_valid:
73-
print('! Schematron business validation failed.')
74-
svrl = schematron.validation_report
75-
if svrl is not None:
76-
for failed in svrl.xpath('//svrl:failed-assert',
77-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'}):
78-
location = failed.get('location', 'unknown')
79-
messages = failed.xpath('svrl:text/text()',
80-
namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'})
81-
message = messages[0].strip() if messages else 'No message provided'
82-
print(f' - {location}: {message}')
83-
else:
84-
print('Schematron business validation passed.')
63+
validation_passed = True
64+
65+
# Validate against XSD if specified
66+
if args.xsd:
67+
validation_passed = validate_xsd(xml_doc, args.xsd)
68+
69+
# Validate against Schematron if specified
70+
if args.schematron:
71+
# Determine validation type from filename
72+
validation_type = "unknown"
73+
if "structure" in args.schematron.lower():
74+
validation_type = "structure"
75+
elif "references" in args.schematron.lower():
76+
validation_type = "references"
77+
elif "business" in args.schematron.lower():
78+
validation_type = "business"
79+
80+
schematron_result = validate_schematron(xml_doc, args.schematron, validation_type)
81+
validation_passed = validation_passed and schematron_result
8582

86-
# Exit with error if any validation failed
87-
if not (xsd_valid and sch_structure_valid and sch_references_valid and sch_business_valid):
88-
exit(1)
83+
# Exit with error if validation failed
84+
if not validation_passed:
85+
sys.exit(1)

.github/workflows/validate_xml_with_xsd_and_schematron.yml

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,30 @@ jobs:
7373
${{ matrix.xml_doc }}
7474
sparse-checkout-cone-mode: false
7575

76-
- name: Run validation
76+
- name: Validate against XSD Schema
7777
working-directory: schema
7878
run: |
7979
python3 "$GITHUB_WORKSPACE"/.github/workflows/validate_xml_with_xsd_and_schematron.py \
8080
"$GITHUB_WORKSPACE"/"${{ matrix.xml_doc }}" \
81-
pvcollada_schema_0.1.xsd \
82-
pvcollada_structure_2.0.sch \
83-
pvcollada_references_2.0.sch \
84-
pvcollada_business_2.0.sch
81+
--xsd pvcollada_schema_0.1.xsd
82+
83+
- name: Validate against Structure Schematron
84+
working-directory: schema
85+
run: |
86+
python3 "$GITHUB_WORKSPACE"/.github/workflows/validate_xml_with_xsd_and_schematron.py \
87+
"$GITHUB_WORKSPACE"/"${{ matrix.xml_doc }}" \
88+
--schematron pvcollada_structure_2.0.sch
89+
90+
- name: Validate against References Schematron
91+
working-directory: schema
92+
run: |
93+
python3 "$GITHUB_WORKSPACE"/.github/workflows/validate_xml_with_xsd_and_schematron.py \
94+
"$GITHUB_WORKSPACE"/"${{ matrix.xml_doc }}" \
95+
--schematron pvcollada_references_2.0.sch
96+
97+
- name: Validate against Business Rules Schematron
98+
working-directory: schema
99+
run: |
100+
python3 "$GITHUB_WORKSPACE"/.github/workflows/validate_xml_with_xsd_and_schematron.py \
101+
"$GITHUB_WORKSPACE"/"${{ matrix.xml_doc }}" \
102+
--schematron pvcollada_business_2.0.sch

0 commit comments

Comments
 (0)