5
5
import netCDF4
6
6
import numpy as np
7
7
import bald
8
+ import rdflib
9
+ import json
10
+ from rdflib import Namespace , BNode , URIRef , Literal
11
+ from rdflib .namespace import RDF
12
+
13
+
14
+
15
+ def getBasename (urlstr ):
16
+ return os .path .basename (urlstr )
17
+
18
+ def baldgraph2schemaorg (graph ):
19
+ """
20
+ Input: netCDF file
21
+ Transforms to a rdflib.Graph bald style
22
+ Returns a new graph in schema.org profile
23
+ """
24
+ #load mappings
25
+ mapping_idx = {}
26
+ mapping_data = []
27
+ with open ('bald2schemaorg_mappings.json' , 'r' ) as f :
28
+ mapping_data = json .load (f )
29
+
30
+ for item in mapping_data :
31
+ mapping_idx [item ['bald' ]] = item ['schemaorg' ]
32
+
33
+ qres = graph .query (
34
+ """PREFIX bald: <http://binary-array-ld.net/latest/>
35
+ SELECT DISTINCT ?pred ?value
36
+ WHERE {
37
+ ?c a bald:Container .
38
+ ?c ?pred ?value
39
+ }""" )
40
+
41
+ schema_g = rdflib .Graph ()
42
+ container = BNode ()
43
+ so = Namespace ("http://schema.org/" )
44
+ schema_g .add ( (container , URIRef ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ), so .Dataset ) )
45
+
46
+ for row in qres :
47
+ currField = getBasename (str (row [0 ])).strip ()
48
+ #print(getBasename(str(row[0])) + ' (type: ' + str(type(row[0])) + ")" + " :: " + row[1] + ' (type: ' + str(type(row[1])) + ")")
49
+ if (currField in mapping_idx .keys ()):
50
+ print ('schemaorg:' + mapping_idx [currField ], "\t " , row [1 ])
51
+ predUri = URIRef ("http://schema.org/" + mapping_idx [currField ])
52
+ lit = Literal (row [1 ])
53
+ schema_g .add ( (container , predUri , lit ) )
54
+ return schema_g
55
+
56
+ def nc2schemaorg (ncfilename , outformat , baseuri = None ):
57
+ root_container = bald .load_netcdf (ncfilename , baseuri = baseuri )
58
+ graph = root_container .rdfgraph ()
59
+ schema_g = baldgraph2schemaorg (graph )
60
+
61
+ if (outformat == 'json-ld' ):
62
+ context = "http://schema.org/"
63
+ s = schema_g .serialize (format = outformat , context = context , indent = 4 ).decode ("utf-8" )
64
+ else :
65
+ s = schema_g .serialize (format = outformat ).decode ("utf-8" )
66
+ print (s )
8
67
9
68
def nc2rdf (ncfilename , outformat , baseuri = None ):
10
- #print("nc2rdf test")
11
- #print(ncfile)
12
69
root_container = bald .load_netcdf (ncfilename , baseuri = baseuri )
13
70
ttl = root_container .rdfgraph ().serialize (format = outformat ).decode ("utf-8" )
14
71
print (ttl )
15
72
73
+ def cdl2schemaorg (cdl_file , outformat , baseuri = None ):
74
+ tfile , tfilename = tempfile .mkstemp ('.nc' )
75
+ subprocess .check_call (['ncgen' , '-o' , tfilename , cdl_file ])
76
+ schema_g = nc2schemaorg (tfilename , outformat , baseuri = baseuri )
77
+ os .close (tfile )
78
+ os .remove (tfilename )
79
+ return schema_g
80
+
16
81
def cdl2rdf (cdl_file , outformat , baseuri = None ):
17
82
#print("cdl2rdf test")
18
83
#print(cdl_file)
@@ -32,13 +97,20 @@ def cdl2rdf(cdl_file, outformat, baseuri=None):
32
97
parser .add_argument ('--baseuri' , action = "store" , dest = "baseuri" , help = "Base URI for the graph" )
33
98
parser .add_argument ('--cdl' , action = "store_true" , dest = "isCDL" , default = False , help = "Flag to indicate file is CDL" )
34
99
parser .add_argument ('--nc' , action = "store_true" , dest = "isNC" , default = False , help = "Flag to indicate file is netCDF" )
100
+ parser .add_argument ('--schema-org' , action = "store_true" , dest = "isSchemaOrgOutput" , default = False , help = "Flag to indicate if schema.org output activated" )
35
101
parser .add_argument ("ncfile" , help = "Path for the netCDF file" )
36
102
37
103
args = parser .parse_args ()
38
104
39
105
if (args .isCDL or args .ncfile .endswith (".cdl" ) or args .ncfile .endswith ('.CDL' )):
40
- cdl2rdf (args .ncfile , args .format , baseuri = args .baseuri )
106
+ if (args .isSchemaOrgOutput ):
107
+ cdl2schemaorg (args .ncfile , args .format , baseuri = args .baseuri )
108
+ else :
109
+ cdl2rdf (args .ncfile , args .format , baseuri = args .baseuri )
41
110
elif (args .isNC or args .ncfile .endswith (".nc" ) or args .ncfile .endswith ('.NC' )):
42
- nc2rdf (args .ncfile , args .format , baseuri = args .baseuri )
111
+ if (args .isSchemaOrgOutput ):
112
+ nc2schemaorg (args .ncfile , args .format , baseuri = args .baseuri )
113
+ else :
114
+ nc2rdf (args .ncfile , args .format , baseuri = args .baseuri )
43
115
else :
44
116
print ("Unrecognised file suffix. Please indicate if CDL or NC via --cdl or --nc" );
0 commit comments