Skip to content

Commit c9bd993

Browse files
committed
domain range pass
1 parent b4b31ff commit c9bd993

File tree

2 files changed

+54
-29
lines changed

2 files changed

+54
-29
lines changed

lib/bald/__init__.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,19 @@ def __init__(self):
1212
self.cache = {}
1313

1414
def __getitem__(self, item):
15-
uri = item
15+
16+
if not item.startswith('http://') or item.startswith('https://'):
17+
raise ValueError('{} is not a HTTP URI.'.format(item))
1618
if item not in self.cache:
17-
if not uri.startswith('http://') or uri.startswith('https://'):
18-
raise ValueError('{} is not a HTTP URI.'.format(item))
19-
if item in self.cache:
20-
result = self.cache[item]
21-
else:
22-
r = requests.get(uri)
23-
if r.status_code == 200:
24-
headers={'Accept':'text/turtle'}
25-
rraw = requests.get(uri, headers=headers)
26-
self.cache[item] = rraw.status_code
27-
else:
28-
self.cache[item] = r.status_code
29-
30-
result = self.cache[item]
31-
return result
19+
headers={'Accept':'text/turtle'}
20+
self.cache[item] = requests.get(item, headers=headers)
21+
22+
return self.cache[item]
3223

3324

3425
def check_uri(self, uri):
3526
result = False
36-
if self[uri] == 200:
27+
if self[uri].status_code == 200:
3728
result = True
3829
return result
3930

@@ -95,7 +86,7 @@ def validate_hdf5(afilepath):
9586
valid = True
9687
cache = {}
9788
root_container = Subject(fhandle.attrs)
98-
root_val = ContainerValidation(subject=root_container)
89+
root_val = ContainerValidation(subject=root_container, fhandle=fhandle)
9990
if not root_val.is_valid():
10091
valid = False
10192
# iterate through the datasets
@@ -105,7 +96,7 @@ def validate_hdf5(afilepath):
10596
sattrs = dict(fhandle.attrs).copy()
10697
sattrs.update(dataset.attrs)
10798
dset = Subject(sattrs)
108-
dset_val = DatasetValidation(name, dataset, fhandle, subject=dset)
99+
dset_val = DatasetValidation(name, dataset, fhandle=fhandle, subject=dset)
109100
if not dset_val.is_valid():
110101
valid = False
111102

lib/bald/validation.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
import numpy as np
3+
import rdflib
34

45
import bald
56

@@ -40,8 +41,9 @@ def exceptions(self):
4041

4142
class SubjectValidation(Validation):
4243

43-
def __init__(self, subject, httpcache=None):
44+
def __init__(self, subject, fhandle, httpcache=None):
4445
self.subject = subject
46+
self.fhandle = fhandle
4547
if isinstance(httpcache, bald.HttpStatusCache):
4648
self.cache = httpcache
4749
else:
@@ -55,6 +57,10 @@ def exceptions(self):
5557
exceptions = []
5658
exceptions = self.check_attr_uris(exceptions)
5759
exceptions = self.check_attr_domain_range(exceptions)
60+
exceptions = self._extra_exceptions(exceptions)
61+
return exceptions
62+
63+
def _extra_exceptions(self, exceptions):
5864
return exceptions
5965

6066
def check_attr_uris(self, exceptions):
@@ -77,6 +83,39 @@ def _check_uri(uri, exceptions):
7783
return exceptions
7884

7985
def check_attr_domain_range(self, exceptions):
86+
for attr, value in self.subject.attrs.iteritems():
87+
uri = self.subject.unpack_uri(attr)
88+
if self.cache.check_uri(uri):
89+
#thus we have a payload
90+
# go rdf
91+
g = rdflib.Graph()
92+
g.parse(data=self.cache[uri].text, format="n3")
93+
query = ('SELECT ?s \n'
94+
'(GROUP_CONCAT(?domain; SEPARATOR=" | ") AS ?domains) \n'
95+
'(GROUP_CONCAT(?type; SEPARATOR=" | ") AS ?types) \n'
96+
'WHERE {{ \n'
97+
'?s a ?type . \n'
98+
'OPTIONAL{{ ?s rdfs:domain ?domain . }} \n'
99+
'FILTER(?s = <{uria}> || ?s = <{urib}>) \n'
100+
'}} \n'
101+
'GROUP BY ?s \n'.format(uria=uri, urib= uri.rstrip('/')))
102+
qres = list(g.query(query))
103+
if len(qres) != 1:
104+
raise ValueError('{} does not define one and only one \n'
105+
'rdfs:domain'.format(uri))
106+
qres, = qres
107+
# implement recursive inheritance check
108+
# we need to check if the value that the attr points to
109+
# has an rdf:type which is the same as the one required by
110+
# the object property constraint
111+
# The value may be a URI or it may be a reference to another
112+
# subject within the file.
113+
# therefore subjects in the file have to be typed?!?
114+
115+
# import pdb; pdb.set_trace()
116+
# if qres.domains != rdflib.term.URIRef(value):
117+
# msg = ('The attribute {} references ')
118+
# exceptions.appen(ValueError(msg))
80119
return exceptions
81120

82121

@@ -85,24 +124,19 @@ class ContainerValidation(SubjectValidation):
85124
def __init__(self, **kwargs):
86125
super(ContainerValidation, self).__init__(**kwargs)
87126

88-
def exceptions(self):
89-
exceptions = []
90-
exceptions = self.check_attr_uris(exceptions)
91-
return exceptions
127+
128+
92129

93130
class DatasetValidation(SubjectValidation):
94131

95132

96-
def __init__(self, name, dataset, fhandle, **kwargs):
133+
def __init__(self, name, dataset, **kwargs):
97134

98135
self.dataset = dataset
99136
self.name = name
100-
self.fhandle = fhandle
101137
super(DatasetValidation, self).__init__(**kwargs)
102138

103-
def exceptions(self):
104-
exceptions = []
105-
exceptions = self.check_attr_uris(exceptions)
139+
def _extra_exceptions(self, exceptions):
106140
exceptions = self.check_array_references(exceptions)
107141
return exceptions
108142

0 commit comments

Comments
 (0)