5
5
import netCDF4
6
6
import numpy as np
7
7
import bald
8
+ import rdflib
9
+ import json
10
+ from rdflib import Namespace , BNode , URIRef , Literal
11
+ from rdflib .namespace import RDF
12
+ try :
13
+ # python 3
14
+ from urllib .parse import urlparse
15
+ except ImportError :
16
+ from urlparse import urlparse
17
+
18
+ def isUrl (url ):
19
+ try :
20
+ result = urlparse (url )
21
+ if all ([result .scheme , result .netloc , result .path ]) and (result .scheme == 'https' or result .scheme == 'http' ):
22
+ return True
23
+ except :
24
+ return False
25
+
26
+ def getBasename (urlstr ):
27
+ return os .path .basename (urlstr )
28
+
29
+ def baldgraph2schemaorg (graph , path = None , baseuri = None ):
30
+ """
31
+ Input: netCDF file
32
+ Transforms to a rdflib.Graph bald style
33
+ Returns a new graph in schema.org profile
34
+ """
35
+ # HACK: The following mappings ignore prefixes as well as prefixes in nc file
36
+ # TODO: Fix references to prefixes/aliases proper
37
+
38
+ #load mappings
39
+ mapping_idx = {}
40
+ mapping_data = []
41
+ with open ('bald2schemaorg_mappings.json' , 'r' ) as f :
42
+ mapping_data = json .load (f )
43
+
44
+ for item in mapping_data :
45
+ mapping_idx [item ['bald' ]] = item ['schemaorg' ]
46
+
47
+ qres = graph .query (
48
+ """PREFIX bald: <http://binary-array-ld.net/latest/>
49
+ SELECT DISTINCT ?pred ?value
50
+ WHERE {
51
+ ?c a bald:Container .
52
+ ?c ?pred ?value
53
+ }""" )
54
+
55
+ schema_g = rdflib .Graph ()
56
+
57
+ if baseuri is not None :
58
+ container = URIRef (baseuri )
59
+ else :
60
+ container = BNode ()
61
+
62
+ so = Namespace ("http://schema.org/" )
63
+ schema_g .add ( (container , URIRef ("http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ), so .Dataset ) )
64
+
65
+ if path is not None and isUrl (path ):
66
+ predUri = URIRef ("http://schema.org/url" )
67
+ schema_g .add ( (container , predUri , URIRef (path )) )
68
+
69
+ for row in qres :
70
+ currField = getBasename (str (row [0 ])).strip ()
71
+ #print(getBasename(str(row[0])) + ' (type: ' + str(type(row[0])) + ")" + " :: " + row[1] + ' (type: ' + str(type(row[1])) + ")")
72
+ if (currField in mapping_idx .keys ()):
73
+ predUri = URIRef ("http://schema.org/" + mapping_idx [currField ])
74
+ if currField == 'keywords' :
75
+ for x in row [1 ].split (',' ):
76
+ kw = x .strip ()
77
+ if len (kw ) == 0 :
78
+ continue
79
+ lit = Literal (kw )
80
+ schema_g .add ( (container , predUri , lit ) )
81
+ continue
82
+
83
+ #print('schemaorg:' + mapping_idx[currField], "\t", row[1])
84
+ lit = Literal (row [1 ])
85
+ schema_g .add ( (container , predUri , lit ) )
86
+ return schema_g
87
+
88
+ def nc2schemaorg (ncfilename , outformat , baseuri = None ):
89
+ root_container = bald .load_netcdf (ncfilename , baseuri = baseuri )
90
+ graph = root_container .rdfgraph ()
91
+ schema_g = baldgraph2schemaorg (graph , path = ncfilename , baseuri = baseuri )
92
+
93
+ if (outformat == 'json-ld' ):
94
+ context = "http://schema.org/"
95
+ s = schema_g .serialize (format = outformat , context = context , indent = 4 ).decode ("utf-8" )
96
+ else :
97
+ s = schema_g .serialize (format = outformat ).decode ("utf-8" )
98
+ print (s )
8
99
9
100
def nc2rdf (ncfilename , outformat , baseuri = None ):
10
- #print("nc2rdf test")
11
- #print(ncfile)
12
101
root_container = bald .load_netcdf (ncfilename , baseuri = baseuri )
13
102
ttl = root_container .rdfgraph ().serialize (format = outformat ).decode ("utf-8" )
14
103
print (ttl )
15
104
105
+ def cdl2schemaorg (cdl_file , outformat , baseuri = None ):
106
+ tfile , tfilename = tempfile .mkstemp ('.nc' )
107
+ subprocess .check_call (['ncgen' , '-o' , tfilename , cdl_file ])
108
+ schema_g = nc2schemaorg (tfilename , outformat , baseuri = baseuri )
109
+ os .close (tfile )
110
+ os .remove (tfilename )
111
+ return schema_g
112
+
16
113
def cdl2rdf (cdl_file , outformat , baseuri = None ):
17
114
#print("cdl2rdf test")
18
115
#print(cdl_file)
@@ -32,13 +129,20 @@ def cdl2rdf(cdl_file, outformat, baseuri=None):
32
129
parser .add_argument ('--baseuri' , action = "store" , dest = "baseuri" , help = "Base URI for the graph" )
33
130
parser .add_argument ('--cdl' , action = "store_true" , dest = "isCDL" , default = False , help = "Flag to indicate file is CDL" )
34
131
parser .add_argument ('--nc' , action = "store_true" , dest = "isNC" , default = False , help = "Flag to indicate file is netCDF" )
132
+ parser .add_argument ('--schema-org' , action = "store_true" , dest = "isSchemaOrgOutput" , default = False , help = "Flag to indicate if schema.org output activated" )
35
133
parser .add_argument ("ncfile" , help = "Path for the netCDF file" )
36
134
37
135
args = parser .parse_args ()
38
136
39
137
if (args .isCDL or args .ncfile .endswith (".cdl" ) or args .ncfile .endswith ('.CDL' )):
40
- cdl2rdf (args .ncfile , args .format , baseuri = args .baseuri )
138
+ if (args .isSchemaOrgOutput ):
139
+ cdl2schemaorg (args .ncfile , args .format , baseuri = args .baseuri )
140
+ else :
141
+ cdl2rdf (args .ncfile , args .format , baseuri = args .baseuri )
41
142
elif (args .isNC or args .ncfile .endswith (".nc" ) or args .ncfile .endswith ('.NC' )):
42
- nc2rdf (args .ncfile , args .format , baseuri = args .baseuri )
143
+ if (args .isSchemaOrgOutput ):
144
+ nc2schemaorg (args .ncfile , args .format , baseuri = args .baseuri )
145
+ else :
146
+ nc2rdf (args .ncfile , args .format , baseuri = args .baseuri )
43
147
else :
44
148
print ("Unrecognised file suffix. Please indicate if CDL or NC via --cdl or --nc" );
0 commit comments