Skip to content

Commit 65e852c

Browse files
committed
tests and test optimisation
1 parent dc90cca commit 65e852c

File tree

8 files changed

+369
-36
lines changed

8 files changed

+369
-36
lines changed

lib/bald/__init__.py

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,12 @@ def __getitem__(self, item):
234234
raise ValueError('{} is not a HTTP URI.'.format(item))
235235
if item not in self.cache:
236236
headers = {'Accept': 'text/turtle'}
237+
# import datetime
238+
# now = datetime.datetime.utcnow()
239+
# print('\ndownloading: {}'.format(item))
237240
self.cache[item] = requests.get(item, headers=headers)
241+
# then = datetime.datetime.utcnow()
242+
# print('{}s'.format((then-now).total_seconds()))
238243

239244
return self.cache[item]
240245

@@ -536,6 +541,7 @@ def load_netcdf(afilepath, baseuri=None):
536541
with load(afilepath) as fhandle:
537542
if baseuri is None:
538543
baseuri = 'file://{}'.format(afilepath)
544+
identity = baseuri
539545
prefix_var_name = None
540546
if hasattr(fhandle, 'bald__isPrefixedBy'):
541547
prefix_var_name = fhandle.bald__isPrefixedBy
@@ -578,13 +584,29 @@ def load_netcdf(afilepath, baseuri=None):
578584
attrs = {}
579585
for k in fhandle.ncattrs():
580586
attrs[k] = getattr(fhandle, k)
581-
# It would be nice to use the URI of the file if it is known.
582-
if baseuri is not None:
583-
identity = baseuri
584-
else:
585-
identity = 'root'
587+
# process Conventions
588+
# Conventions = "CF-1.6, ACDD-1.3"
589+
aliasgraph = rdflib.Graph()
590+
if hasattr(fhandle, 'Conventions'):
591+
conventions = [c.strip() for c in fhandle.Conventions.split(',')]
592+
for conv in conventions:
593+
if conv.startswith('CF-'):
594+
uri = 'http://def.scitools.org.uk/CFTerms?_format=ttl'
595+
result = aliasgraph.parse(uri)
596+
qstr = ('select ?alias ?uri where '
597+
'{?uri dct:identifier ?alias .}')
598+
qres = aliasgraph.query(qstr)
599+
600+
new_aliases = [(str(q[0]), str(q[1])) for q in list(qres)]
601+
na_keys = [n[0] for n in new_aliases]
602+
if len(set(na_keys)) != len(na_keys):
603+
raise ValueError('duplicate aliases')
604+
aliases = careful_update(aliases, dict(new_aliases))
605+
606+
586607
root_container = Container(identity, attrs, prefixes=prefixes,
587608
aliases=aliases)
609+
588610
root_container.attrs['bald__contains'] = []
589611
file_variables = {}
590612
for name in fhandle.variables:
@@ -612,7 +634,20 @@ def load_netcdf(afilepath, baseuri=None):
612634
file_variables[name] = var
613635

614636

615-
637+
reference_prefixes = dict()
638+
reference_graph = aliasgraph
639+
reference_graph.parse('http://binary-array-ld.net/latest?_format=ttl')
640+
qstr = ('prefix bald: <http://binary-array-ld.net/latest/> '
641+
'select ?s '
642+
'where { '
643+
' ?s rdfs:range ?type . '
644+
'filter(?type != rdfs:Literal)'
645+
'}')
646+
refs = reference_graph.query(qstr)
647+
648+
ref_prefs = [str(ref[0]) for ref in list(refs)]
649+
650+
616651
# cycle again and find references
617652
for name in fhandle.variables:
618653
if name == prefix_var_name or name == alias_var_name:
@@ -625,7 +660,12 @@ def load_netcdf(afilepath, baseuri=None):
625660
fhandle.variables[name].dimensions[0] == name):
626661
sattrs['bald__array'] = name
627662

628-
for sattr in sattrs:
663+
# for sattr in sattrs:
664+
for sattr in (sattr for sattr in sattrs if
665+
root_container.unpack_uri(sattr) in ref_prefs):
666+
# if sattr == 'coordinates':
667+
# import pdb; pdb.set_trace()
668+
629669
if (isinstance(sattrs[sattr], str) and
630670
file_variables.get(sattrs[sattr])):
631671
# next: remove all use of set, everything is dict or orderedDict
@@ -647,14 +687,9 @@ def load_netcdf(afilepath, baseuri=None):
647687
var.attrs[sattr] = set([file_variables.get(pref)
648688
for pref in potrefs_set])
649689

650-
# if name == 'pdim0':
651-
# import pdb; pdb.set_trace()
652-
653690
# coordinate variables are bald__references except for
654691
# variables that already declare themselves as bald__Reference
655692
if 'bald__Reference' not in var.rdf__type:
656-
# if name == 'pdim0':
657-
# import pdb; pdb.set_trace()
658693
for dim in fhandle.variables[name].dimensions:
659694
if file_variables.get(dim):
660695
cv_shape = fhandle.variables[dim].shape
@@ -687,14 +722,14 @@ def load_netcdf(afilepath, baseuri=None):
687722
return root_container
688723

689724

690-
def validate_netcdf(afilepath):
725+
def validate_netcdf(afilepath, cache=None):
691726
"""
692727
Validate a file with respect to binary-array-linked-data.
693728
Returns a :class:`bald.validation.Validation`
694729
695730
"""
696731
root_container = load_netcdf(afilepath)
697-
return validate(root_container)
732+
return validate(root_container, cache=cache)
698733

699734

700735
def validate_hdf5(afilepath):
@@ -706,7 +741,7 @@ def validate_hdf5(afilepath):
706741
root_container = load_hdf5(afilepath)
707742
return validate(root_container)
708743

709-
def validate(root_container, sval=None):
744+
def validate(root_container, sval=None, cache=None):
710745
"""
711746
Validate a Container with respect to binary-array-linked-data.
712747
Returns a :class:`bald.validation.Validation`
@@ -715,16 +750,16 @@ def validate(root_container, sval=None):
715750
if sval is None:
716751
sval = bv.StoredValidation()
717752

718-
root_val = bv.ContainerValidation(subject=root_container)
753+
root_val = bv.ContainerValidation(subject=root_container, httpcache=cache)
719754
sval.stored_exceptions += root_val.exceptions()
720755
for subject in root_container.attrs.get('bald__contains', []):
721756
if isinstance(subject, Array):
722-
array_val = bv.ArrayValidation(subject)
757+
array_val = bv.ArrayValidation(subject, httpcache=cache)
723758
sval.stored_exceptions += array_val.exceptions()
724759
elif isinstance(subject, Container):
725-
sval = validate(subject, sval=sval)
760+
sval = validate(subject, sval=sval, cache=cache)
726761
elif isinstance(subject, Subject):
727-
subject_val = bv.SubjectValidation(subject)
762+
subject_val = bv.SubjectValidation(subject, httpcache=cache)
728763
sval.stored_exceptions += subject_val.exceptions()
729764

730765
return sval

lib/bald/tests/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,14 @@
44
import tempfile
55
import unittest
66

7+
from bald import HttpCache
8+
9+
acache = HttpCache()
710

811
class BaldTestCase(unittest.TestCase):
12+
# make a cache for testing optimistation
13+
acache = acache
14+
915
@contextlib.contextmanager
1016
def temp_filename(self, suffix=''):
1117
temp_file = tempfile.mkstemp(suffix)
@@ -31,3 +37,4 @@ def assertStringEqual(self, first, second, msg=None):
3137
msg = ''.join(dlines)
3238

3339
assertion_func(first, second, msg=msg)
40+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
@prefix bald: <http://binary-array-ld.net/latest/> .
2+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
3+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
4+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
5+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
6+
7+
<file://CDL/array_reference.cdl> a bald:Container ;
8+
bald:contains <file://CDL/array_reference.cdl/child_variable>,
9+
<file://CDL/array_reference.cdl/parent_variable> ;
10+
bald:isPrefixedBy "prefix_list" .
11+
12+
<file://CDL/array_reference.cdl/parent_variable> a bald:Array ;
13+
bald:references <file://CDL/array_reference.cdl/child_variable> ;
14+
bald:shape "(11, 17)" .
15+
16+
<file://CDL/array_reference.cdl/child_variable> a bald:Array,
17+
bald:Reference ;
18+
bald:array <file://CDL/array_reference.cdl/child_variable> ;
19+
bald:shape "(11, 17)" .
20+
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
@prefix bald: <http://binary-array-ld.net/latest/> .
2+
@prefix ns1: <http://def.scitools.org.uk/CFTerms/> .
3+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
4+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
5+
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
6+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
7+
8+
<file://CDL/point_template.cdl> a bald:Container ;
9+
<Conventions> "CF-1.6, ACDD-1.3" ;
10+
<acknowledgement> "thanks to the NCEI netCDF working group" ;
11+
<cdm_data_type> "Point" ;
12+
<comment> "This data file is just an example, the data are completely BOGUS!" ;
13+
<contributor_name> "NCEI" ;
14+
<contributor_role> "Data Center" ;
15+
<creator_email> "[email protected]" ;
16+
<creator_institution> "NCEI" ;
17+
<creator_name> "Mathew Biddle" ;
18+
<creator_type> "person" ;
19+
<creator_url> <http://www.nodc.noaa.gov/> ;
20+
<date_created> "2016-09-22T18:16:06.590413Z" ;
21+
<date_issued> "2016-09-22T18:16:06.590413Z" ;
22+
<date_metadata_modified> "2016-09-22T18:16:06.590413Z" ;
23+
<date_modified> "2016-09-22T18:16:06.590413Z" ;
24+
<geospatial_bounds> "POINT (-123.458000 38.048000)" ;
25+
<geospatial_bounds_crs> "EPSG:4326" ;
26+
<geospatial_bounds_vertical_crs> "EPSG:5829" ;
27+
<geospatial_lat_max> 3.8048e+01 ;
28+
<geospatial_lat_min> 3.8048e+01 ;
29+
<geospatial_lat_units> "degrees_north" ;
30+
<geospatial_lon_max> -1.23458e+02 ;
31+
<geospatial_lon_min> -1.23458e+02 ;
32+
<geospatial_lon_units> "degrees_east" ;
33+
<geospatial_vertical_max> 1.5e+00 ;
34+
<geospatial_vertical_min> 1.5e+00 ;
35+
<geospatial_vertical_positive> "down" ;
36+
<geospatial_vertical_units> "m" ;
37+
<history> "This file was created on 2016-09-22T18:16:06.590413Z" ;
38+
bald:contains <file://CDL/point_template.cdl/crs>,
39+
<file://CDL/point_template.cdl/instrument1>,
40+
<file://CDL/point_template.cdl/lat>,
41+
<file://CDL/point_template.cdl/lon>,
42+
<file://CDL/point_template.cdl/platform1>,
43+
<file://CDL/point_template.cdl/sal>,
44+
<file://CDL/point_template.cdl/temp>,
45+
<file://CDL/point_template.cdl/time>,
46+
<file://CDL/point_template.cdl/z> ;
47+
ns1:featureType "point" ;
48+
<id> "NCEI_point_template_v2.0_2016-09-22_181606.590413.nc" ;
49+
<institution> "NCEI" ;
50+
<instrument> "In Situ/Laboratory Instruments > Profilers/Sounders > > > CTD" ;
51+
<instrument_vocabulary> "GCMD Earth Science Keywords. Version 5.3.3" ;
52+
<keywords> "Oceans > Ocean Temperature > Water Temperature, Oceans > Salinity/Density > Salinity" ;
53+
<keywords_vocabulary> "GCMD Earth Science Keywords. Version 5.3.3" ;
54+
<license> "Freely available" ;
55+
<metadata_link> <https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/> ;
56+
<naming_authority> "gov.noaa.ncei" ;
57+
<ncei_template_version> "NCEI_NetCDF_Point_Template_v2.0" ;
58+
<platform> "In Situ Ocean-based Platforms > MOORINGS" ;
59+
<platform_vocabulary> "GCMD Earth Science Keywords. Version 5.3.3" ;
60+
<processing_level> "BOGUS DATA" ;
61+
<product_version> "v1" ;
62+
<program> "NCEI-IOOS Data Pipeline" ;
63+
<project> "NCEI NetCDF templates" ;
64+
<publisher_email> "[email protected]" ;
65+
<publisher_institution> "NCEI" ;
66+
<publisher_name> "NCEI Data Manager" ;
67+
<publisher_type> "position" ;
68+
<publisher_url> <http://www.ncei.noaa.gov/> ;
69+
<references> <https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/> ;
70+
<sea_name> "Cordell Bank National Marine Sanctuary, North Pacific Ocean" ;
71+
<source> "Python script generate_NCEI_netCDF_template.py with options: {'template_version': '2.0', 'feature_type': 'point'}" ;
72+
<standard_name_vocabulary> "CF Standard Name Table v30" ;
73+
<summary> "This is an example of the Oceanographic and surface meteorological data collected from the cordell bank monitoring station by the National Centers for Environmental Information (NCEI) in the Cordell Bank National Marine Sanctuary from 2015-03-25 to 2015-03-25. The data contained within this file are completely bogus and are generated using the python module numpy.random.rand() function. This file can be used for testing with various applications. The uuid was generated using the uuid python module, invoking the command uuid.uuid4()." ;
74+
<time_coverage_end> "2015-03-25T22:20:17Z" ;
75+
<time_coverage_start> "2015-03-25T22:20:17Z" ;
76+
<title> "Oceanographic and surface meteorological data collected from the cordell bank monitoring station by the National Centers for Environmental Information (NCEI) in the Cordell Bank National Marine Sanctuary from 2015-03-25 to 2015-03-25" ;
77+
<uuid> "ade5a344-d574-4716-b1f6-fda75ff0cfc1" .
78+
79+
<file://CDL/point_template.cdl/crs> a bald:Subject ;
80+
<epsg_code> "EPSG:4326" ;
81+
<grid_mapping_name> "latitude_longitude" ;
82+
<inverse_flattening> 2.982572e+02 ;
83+
<longitude_of_prime_meridian> 0e+00 ;
84+
<semi_major_axis> 6.378137e+06 .
85+
86+
<file://CDL/point_template.cdl/instrument1> a bald:Subject ;
87+
<accuracy> "" ;
88+
<calibration_date> "2016-03-25" ;
89+
<comment> "serial number and calibration dates are bogus" ;
90+
ns1:long_name "Seabird 37 Microcat" ;
91+
<make_model> "SBE-37" ;
92+
<ncei_name> "CTD" ;
93+
<precision> "" ;
94+
<serial_number> "1859723" .
95+
96+
<file://CDL/point_template.cdl/platform1> a bald:Subject ;
97+
<call_sign> "" ;
98+
<comment> "Data is not actually collected from this platform, this is an example." ;
99+
ns1:long_name "cordell bank monitoring station" ;
100+
<ices_code> "" ;
101+
<imo_code> "" ;
102+
<ioos_code> "urn:ioos:station:NCEI:Mooring1" ;
103+
<ncei_code> "FIXED PLATFORM, MOORINGS" ;
104+
<wmo_code> "" .
105+
106+
<file://CDL/point_template.cdl/sal> a bald:Array ;
107+
<_FillValue> -9.999e+03 ;
108+
<comment> "These data are bogus!!!!!" ;
109+
<coverage_content_type> "physicalMeasurement" ;
110+
<data_max> 3.3e+01 ;
111+
<data_min> 3.3e+01 ;
112+
bald:shape "(1,)" ;
113+
ns1:add_offset 0e+00 ;
114+
ns1:cell_methods "time: point longitude: point latitude: point" ;
115+
ns1:coordinates <file://CDL/point_template.cdl/lat>,
116+
<file://CDL/point_template.cdl/lon>,
117+
<file://CDL/point_template.cdl/time>,
118+
<file://CDL/point_template.cdl/z> ;
119+
ns1:grid_mapping "crs" ;
120+
ns1:long_name "Salinity" ;
121+
ns1:missing_value -8.888e+03 ;
122+
ns1:scale_factor 1e+00 ;
123+
ns1:standard_name "sea_water_salinity" ;
124+
ns1:units "0.001" ;
125+
<instrument> "instrument1" ;
126+
<ncei_name> "SALINITY" ;
127+
<platform> "platform1" ;
128+
<references> <http://www.numpy.org/> ;
129+
<source> "numpy.random.rand function." ;
130+
<valid_max> 1e+02 ;
131+
<valid_min> 0e+00 .
132+
133+
<file://CDL/point_template.cdl/temp> a bald:Array ;
134+
<_FillValue> -9.999e+03 ;
135+
<comment> "These data are bogus!!!!!" ;
136+
<coverage_content_type> "physicalMeasurement" ;
137+
<data_max> 1.3e+01 ;
138+
<data_min> 1.3e+01 ;
139+
bald:shape "(1,)" ;
140+
ns1:add_offset 0e+00 ;
141+
ns1:cell_methods "time: point longitude: point latitude: point" ;
142+
ns1:coordinates <file://CDL/point_template.cdl/lat>,
143+
<file://CDL/point_template.cdl/lon>,
144+
<file://CDL/point_template.cdl/time>,
145+
<file://CDL/point_template.cdl/z> ;
146+
ns1:grid_mapping "crs" ;
147+
ns1:long_name "Temperature" ;
148+
ns1:missing_value -8.888e+03 ;
149+
ns1:scale_factor 1e+00 ;
150+
ns1:standard_name "sea_water_temperature" ;
151+
ns1:units "degree_Celsius" ;
152+
<instrument> "instrument1" ;
153+
<ncei_name> "WATER TEMPERATURE" ;
154+
<platform> "platform1" ;
155+
<references> <http://www.numpy.org/> ;
156+
<source> "numpy.random.rand function." ;
157+
<valid_max> 1e+02 ;
158+
<valid_min> 0e+00 .
159+
160+
<file://CDL/point_template.cdl/lat> a bald:Array ;
161+
<_FillValue> -9.999e+03 ;
162+
<comment> "These data are bogus!!!!!" ;
163+
bald:shape "(1,)" ;
164+
ns1:axis "Y" ;
165+
ns1:long_name "Latitude" ;
166+
ns1:standard_name "latitude" ;
167+
ns1:units "degrees_north" ;
168+
<valid_max> 9e+01 ;
169+
<valid_min> -9e+01 .
170+
171+
<file://CDL/point_template.cdl/lon> a bald:Array ;
172+
<_FillValue> -9.999e+03 ;
173+
<comment> "These data are bogus!!!!!" ;
174+
bald:shape "(1,)" ;
175+
ns1:axis "X" ;
176+
ns1:long_name "Longitude" ;
177+
ns1:standard_name "longitude" ;
178+
ns1:units "degrees_east" ;
179+
<valid_max> 1.8e+02 ;
180+
<valid_min> -1.8e+02 .
181+
182+
<file://CDL/point_template.cdl/time> a bald:Array ;
183+
<_FillValue> -9.999e+03 ;
184+
<comment> "These data are bogus!!!!!" ;
185+
bald:shape "(1,)" ;
186+
ns1:axis "T" ;
187+
ns1:calendar "julian" ;
188+
ns1:long_name "Time" ;
189+
ns1:standard_name "time" ;
190+
ns1:units "seconds since 1970-01-01 00:00:00 UTC" .
191+
192+
<file://CDL/point_template.cdl/z> a bald:Array ;
193+
<_FillValue> -9.999e+03 ;
194+
<comment> "These data are bogus!!!!!" ;
195+
bald:shape "(1,)" ;
196+
ns1:axis "Z" ;
197+
ns1:long_name "depth of sensor" ;
198+
ns1:positive "down" ;
199+
ns1:standard_name "depth" ;
200+
ns1:units "m" ;
201+
<valid_max> 1.0971e+04 ;
202+
<valid_min> 0e+00 .
203+

0 commit comments

Comments
 (0)