Skip to content

Commit 9301915

Browse files
committed
adding nc2schemaorg function
1 parent 48d1b71 commit 9301915

File tree

2 files changed

+84
-4
lines changed

2 files changed

+84
-4
lines changed

nc2rdf/bald2schemaorg_mappings.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{ "bald" : "summary", "schemaorg": "description" },
3+
{ "bald" : "title", "schemaorg": "name" },
4+
{ "bald" : "id", "schemaorg": "identifier" },
5+
{ "bald" : "keywords", "schemaorg": "keywords" },
6+
{ "bald" : "license", "schemaorg": "license" },
7+
{ "bald" : "standard_name", "schemaorg": "variableMeasured" }
8+
]

nc2rdf/nc2rdf.py

Lines changed: 76 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,79 @@
55
import netCDF4
66
import numpy as np
77
import bald
8+
import rdflib
9+
import json
10+
from rdflib import Namespace, BNode, URIRef, Literal
11+
from rdflib.namespace import RDF
12+
13+
14+
15+
def getBasename(urlstr):
16+
return os.path.basename(urlstr)
17+
18+
def baldgraph2schemaorg(graph):
19+
"""
20+
Input: netCDF file
21+
Transforms to a rdflib.Graph bald style
22+
Returns a new graph in schema.org profile
23+
"""
24+
#load mappings
25+
mapping_idx = {}
26+
mapping_data = []
27+
with open('bald2schemaorg_mappings.json' , 'r') as f:
28+
mapping_data = json.load(f)
29+
30+
for item in mapping_data:
31+
mapping_idx[item['bald']] = item['schemaorg']
32+
33+
qres = graph.query(
34+
"""PREFIX bald: <http://binary-array-ld.net/latest/>
35+
SELECT DISTINCT ?pred ?value
36+
WHERE {
37+
?c a bald:Container .
38+
?c ?pred ?value
39+
}""")
40+
41+
schema_g = rdflib.Graph()
42+
container = BNode()
43+
so = Namespace("http://schema.org/")
44+
schema_g.add( (container, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), so.Dataset) )
45+
46+
for row in qres:
47+
currField = getBasename(str(row[0])).strip()
48+
#print(getBasename(str(row[0])) + ' (type: ' + str(type(row[0])) + ")" + " :: " + row[1] + ' (type: ' + str(type(row[1])) + ")")
49+
if(currField in mapping_idx.keys()):
50+
print('schemaorg:' + mapping_idx[currField], "\t", row[1])
51+
predUri = URIRef("http://schema.org/" + mapping_idx[currField])
52+
lit = Literal(row[1])
53+
schema_g.add( (container, predUri, lit) )
54+
return schema_g
55+
56+
def nc2schemaorg(ncfilename, outformat, baseuri=None):
57+
root_container = bald.load_netcdf(ncfilename, baseuri=baseuri)
58+
graph = root_container.rdfgraph()
59+
schema_g = baldgraph2schemaorg(graph)
60+
61+
if(outformat == 'json-ld'):
62+
context = "http://schema.org/"
63+
s = schema_g.serialize(format=outformat, context=context, indent=4).decode("utf-8")
64+
else:
65+
s = schema_g.serialize(format=outformat).decode("utf-8")
66+
print(s)
867

968
def nc2rdf(ncfilename, outformat, baseuri=None):
10-
#print("nc2rdf test")
11-
#print(ncfile)
1269
root_container = bald.load_netcdf(ncfilename, baseuri=baseuri)
1370
ttl = root_container.rdfgraph().serialize(format=outformat).decode("utf-8")
1471
print(ttl)
1572

73+
def cdl2schemaorg(cdl_file, outformat, baseuri=None):
74+
tfile, tfilename = tempfile.mkstemp('.nc')
75+
subprocess.check_call(['ncgen', '-o', tfilename, cdl_file])
76+
schema_g = nc2schemaorg(tfilename, outformat, baseuri=baseuri)
77+
os.close(tfile)
78+
os.remove(tfilename)
79+
return schema_g
80+
1681
def cdl2rdf(cdl_file, outformat, baseuri=None):
1782
#print("cdl2rdf test")
1883
#print(cdl_file)
@@ -32,13 +97,20 @@ def cdl2rdf(cdl_file, outformat, baseuri=None):
3297
parser.add_argument('--baseuri', action="store", dest="baseuri", help="Base URI for the graph")
3398
parser.add_argument('--cdl', action="store_true", dest="isCDL", default=False, help="Flag to indicate file is CDL")
3499
parser.add_argument('--nc', action="store_true", dest="isNC", default=False, help="Flag to indicate file is netCDF")
100+
parser.add_argument('--schema-org', action="store_true", dest="isSchemaOrgOutput", default=False, help="Flag to indicate if schema.org output activated")
35101
parser.add_argument("ncfile", help="Path for the netCDF file")
36102

37103
args = parser.parse_args()
38104

39105
if(args.isCDL or args.ncfile.endswith(".cdl") or args.ncfile.endswith('.CDL')):
40-
cdl2rdf(args.ncfile, args.format, baseuri=args.baseuri)
106+
if(args.isSchemaOrgOutput):
107+
cdl2schemaorg(args.ncfile, args.format, baseuri=args.baseuri)
108+
else:
109+
cdl2rdf(args.ncfile, args.format, baseuri=args.baseuri)
41110
elif(args.isNC or args.ncfile.endswith(".nc") or args.ncfile.endswith('.NC')):
42-
nc2rdf(args.ncfile, args.format, baseuri=args.baseuri)
111+
if(args.isSchemaOrgOutput):
112+
nc2schemaorg(args.ncfile, args.format, baseuri=args.baseuri)
113+
else:
114+
nc2rdf(args.ncfile, args.format, baseuri=args.baseuri)
43115
else:
44116
print("Unrecognised file suffix. Please indicate if CDL or NC via --cdl or --nc");

0 commit comments

Comments
 (0)