Skip to content

Commit 93d8259

Browse files
authored
Merge pull request #45 from JimBiardCics/master
Adding udunits graph and table builders.
2 parents 6b1d04e + 82f21f3 commit 93d8259

File tree

3 files changed

+1114
-0
lines changed

3 files changed

+1114
-0
lines changed

UdunitsTables/BuildUnitsGraph.py

Lines changed: 364 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,364 @@
1+
from __future__ import print_function
2+
from neo4jrestclient.client import GraphDatabase, Node, Relationship
3+
4+
import re
5+
import sys
6+
import xml.etree.ElementTree as ETree
7+
8+
9+
def createNode(dB, label = None, properties = None):
10+
if label is None and properties is None:
11+
raise Exception(args = ('ERROR: createNode() called with both label and properties arguments = None.',))
12+
13+
if label is not None and type(label) is not str and type(label) is not unicode:
14+
raise Exception(args = ('ERROR: createNode() called with a label argument that is not a str or unicode type.',))
15+
16+
if type(properties) is not dict:
17+
raise Exception(args = ('ERROR: createNode() called with a properties argument that is not a dict type.',))
18+
19+
query = 'CREATE (n'
20+
21+
if label is not None:
22+
query += ':' + label
23+
24+
if properties is not None and 0 < len(properties):
25+
query += ' {'
26+
27+
for prop in properties:
28+
query += prop + ' : {' + prop + '}, '
29+
30+
query = query[:-2] + '}'
31+
32+
query += ') return n'
33+
34+
node = dB.query(query, params = properties, returns = (Node,))[0][0]
35+
36+
return node
37+
38+
39+
def mergeNode(dB, label = None, properties = None):
40+
if label is None and properties is None:
41+
raise Exception(args = ('ERROR: mergeNode() called with both label and properties arguments = None.',))
42+
43+
if label is not None and type(label) is not str and type(label) is not unicode:
44+
raise Exception(args = ('ERROR: mergeNode() called with a label argument that is not a str or unicode type.',))
45+
46+
if type(properties) is not dict:
47+
raise Exception(args = ('ERROR: mergeNode() called with a properties argument that is not a dict type.',))
48+
49+
query = 'MERGE (n'
50+
51+
if label is not None:
52+
query += ':' + label
53+
54+
if properties is not None and 0 < len(properties):
55+
query += ' {'
56+
57+
for prop in properties:
58+
query += prop + ' : {' + prop + '}, '
59+
60+
query = query[:-2] + '}'
61+
62+
query += ') return n'
63+
64+
try:
65+
node = dB.query(query, params = properties, returns = (Node,))[0][0]
66+
except:
67+
raise Exception(args = ('ERROR: dB.query failed in mergeNode.',))
68+
69+
return node
70+
71+
72+
def mergeRelationship(dB, fromNode, relType, toNode, properties = None):
73+
if relType is None:
74+
raise Exception(args = ('ERROR: mergeRelationship() called with a relType argument of None.',))
75+
76+
if type(relType) is not str and type(relType) is not unicode:
77+
raise Exception(args = ('ERROR: mergeRelationship() called with a relType argument that is not a str or unicode type.',))
78+
79+
if properties is not None and type(properties) is not dict:
80+
raise Exception(args = ('ERROR: mergeRelationship() called with a properties argument that is not a dict type.',))
81+
82+
query = 'START a = node({_from}), b = node({_to}) MERGE (a)-[r:' + relType
83+
84+
if properties is not None and 0 < len(properties):
85+
query += ' {'
86+
for property in properties:
87+
query += property + ':{' + property + '}, '
88+
89+
query = query[0:-2] + '}'
90+
91+
query += ']->(b) RETURN r'
92+
93+
if properties is not None:
94+
params = dict(properties)
95+
else:
96+
params = dict()
97+
98+
params['_from'] = fromNode.id
99+
params['_to'] = toNode.id
100+
101+
relationship = dB.query(query, params = params, returns = (Relationship,))[0][0]
102+
103+
return relationship
104+
105+
106+
def getNames(nameElement, noPlural = False):
107+
theName = nameElement.find('./singular').text
108+
109+
pluralElement = nameElement.find('./plural')
110+
noPluralElement = nameElement.find('./noplural')
111+
112+
if noPluralElement is not None or noPlural == True:
113+
thePlural = None
114+
elif pluralElement is not None:
115+
thePlural = pluralElement.text
116+
else:
117+
if re.search('ch$|sh$|s$|x$', theName) is not None:
118+
thePlural = theName + 'es'
119+
elif re.search('[^aeiou]y$', theName) is not None:
120+
thePlural = theName[:-1] + 'ies'
121+
else:
122+
thePlural = theName + 's'
123+
124+
commentElements = list()
125+
126+
commentElements.extend(nameElement.findall('[@comment]'))
127+
commentElements.extend(nameElement.findall('.*[@comment]'))
128+
129+
theComments = list()
130+
131+
for element in commentElements:
132+
theComments.append(element.attrib['comment'])
133+
134+
return (theName, thePlural, theComments)
135+
136+
137+
def mergeNameNode(dB, nameElement, noPlural = False):
138+
if nameElement is None:
139+
raise Exception(args = ('ERROR: The nameElement argument is None.',))
140+
141+
properties = {}
142+
143+
theName, thePlural, theComments = getNames(nameElement, noPlural)
144+
145+
properties['name'] = theName
146+
147+
if thePlural is not None:
148+
properties['plural'] = thePlural
149+
150+
if len(theComments) > 0:
151+
properties['comments'] = theComments
152+
153+
unitNode = mergeNode(dB, 'UnitName', properties)
154+
155+
return unitNode
156+
157+
158+
def addNamesAndSymbols(dB, unitNode, parentElement):
159+
nameElements = parentElement.findall('./name')
160+
noPluralElement = parentElement.find('./noplural')
161+
symbolElements = parentElement.findall('./symbol')
162+
163+
noPlural = False
164+
165+
if noPluralElement is not None:
166+
noPlural = True
167+
168+
for nameElement in nameElements:
169+
nameNode = mergeNameNode(dB, nameElement, noPlural)
170+
171+
mergeRelationship(dB, nameNode, 'NAME_FOR', unitNode)
172+
173+
for symbolElement in symbolElements:
174+
properties = dict()
175+
176+
properties['name'] = symbolElement.text
177+
178+
if 'comment' in symbolElement.attrib:
179+
properties['comment'] = symbolElement.attrib['comment']
180+
181+
symbolNode = mergeNode(dB, 'UnitSymbol', properties)
182+
183+
mergeRelationship(dB, symbolNode, 'SYMBOL_FOR', unitNode)
184+
185+
186+
def addUnit(dB, unitElement):
187+
aliasesElement = unitElement.find('./aliases')
188+
baseElement = unitElement.find('./base')
189+
defElement = unitElement.find('./def')
190+
dimensionlessElement = unitElement.find('./dimensionless')
191+
nameElement = unitElement.find('./name')
192+
definitionElement = unitElement.find('./definition')
193+
commentElement = unitElement.find('./comment')
194+
195+
unitNode = None
196+
properties = dict()
197+
198+
if definitionElement is not None and definitionElement.text is not None:
199+
properties['definition'] = definitionElement.text
200+
201+
if commentElement is not None:
202+
properties['comment'] = commentElement.text
203+
204+
if defElement is not None:
205+
properties['formula'] = defElement.text
206+
elif baseElement is not None or dimensionlessElement is not None:
207+
if nameElement is None:
208+
raise Exception(args = ('ERROR: base or dimesionless unit does not have a name element.',))
209+
210+
nameText = nameElement.find('./singular').text
211+
212+
if baseElement is not None:
213+
properties['formula'] = 'Base SI unit'
214+
properties['canonicalName'] = nameText
215+
elif dimensionlessElement is not None:
216+
properties['formula'] = 'Dimensionless quantity'
217+
properties['canonicalName'] = nameText
218+
else:
219+
raise Exception(args = ('ERROR: unit does not have a def, base, or dimensionless element.',))
220+
221+
unitNode = mergeNode(dB, 'Unit', properties)
222+
223+
if nameElement is not None:
224+
addNamesAndSymbols(dB, unitNode, unitElement)
225+
226+
if aliasesElement is not None:
227+
addNamesAndSymbols(dB, unitNode, aliasesElement)
228+
229+
230+
def addPrefix(dB, prefixElement):
231+
valueElement = prefixElement.find('./value')
232+
nameElement = prefixElement.find('./name')
233+
symbolElements = prefixElement.findall('./symbol')
234+
235+
if valueElement is None or nameElement is None or symbolElements is None or 0 == len(symbolElements):
236+
raise Exception(args = ('ERROR: Prefix element is incomplete.',))
237+
238+
prefixNode = mergeNode(dB, 'Prefix', {'name' : nameElement.text, 'value' : valueElement.text})
239+
240+
for symbolElement in symbolElements:
241+
properties = dict()
242+
243+
properties['name'] = symbolElement.text
244+
245+
if 'comment' in symbolElement.attrib:
246+
properties['comment'] = symbolElement.attrib['comment']
247+
248+
symbolNode = mergeNode(dB, 'PrefixSymbol', properties)
249+
250+
mergeRelationship(dB, symbolNode, 'SYMBOL_FOR', prefixNode)
251+
252+
253+
def addUnits(dB, file):
254+
eTree = ETree.parse(file)
255+
rootElement = eTree.getroot()
256+
257+
unitElements = rootElement.findall('./unit')
258+
259+
for unitElement in unitElements:
260+
addUnit(dB, unitElement)
261+
262+
prefixElements = rootElement.findall('./prefix')
263+
264+
for prefixElement in prefixElements:
265+
addPrefix(dB, prefixElement)
266+
267+
268+
def parseFormulas(dB):
269+
iter = dB.labels.get('UnitName').all()
270+
271+
nameDict = dict()
272+
273+
for name in iter:
274+
nameDict[name['name']] = name
275+
276+
try:
277+
nameDict[name['plural']] = name
278+
except:
279+
pass
280+
281+
iter = dB.labels.get('UnitSymbol').all()
282+
283+
for symbol in iter:
284+
nameDict[symbol['name']] = symbol
285+
286+
iter = dB.labels.get('Prefix').all()
287+
288+
prefixList = list()
289+
290+
for prefix in iter:
291+
prefixList.append(prefix['name'])
292+
293+
iter = dB.labels.get('PrefixSymbol').all()
294+
295+
for symbol in iter:
296+
prefixList.append(symbol['name'])
297+
298+
partsRegex = re.compile('[. @/)^(+-]')
299+
ignoreRegex = re.compile('\d+e?\d*$|lg$|re$')
300+
trailRegex = re.compile('\d+$')
301+
302+
for unit in dB.labels.get('Unit').all():
303+
referenceList = list()
304+
sourceList = list()
305+
306+
theFormula = unit['formula']
307+
308+
if 'Base SI unit' == theFormula or 'Dimensionless quantity' == theFormula:
309+
continue
310+
311+
parts = partsRegex.split(theFormula)
312+
313+
for part in parts:
314+
if '' == part:
315+
continue
316+
317+
if ignoreRegex.match(part) is not None:
318+
continue
319+
320+
part = trailRegex.sub('', part)
321+
322+
if '' == part:
323+
continue
324+
325+
fullName = part
326+
327+
if part not in nameDict:
328+
part = None
329+
330+
for prefix in prefixList:
331+
aMatch = re.match(prefix + '(.*)$', fullName)
332+
333+
if aMatch:
334+
base = aMatch.group(1)
335+
336+
if base in nameDict:
337+
part = base
338+
339+
break
340+
341+
if part is not None:
342+
referenceList.append(part)
343+
sourceList.append(fullName)
344+
345+
mergeRelationship(dB, unit, 'REFERENCES', nameDict[part], properties = {'as' : fullName})
346+
347+
if 0 == len(referenceList):
348+
continue
349+
350+
unit['references'] = referenceList
351+
unit['sources'] = sourceList
352+
353+
354+
def main():
355+
dB = GraphDatabase('http://localhost:7474/db/data/')
356+
357+
for file in sys.argv[1:]:
358+
addUnits(dB, file)
359+
360+
parseFormulas(dB)
361+
362+
363+
if __name__ == '__main__':
364+
main()

0 commit comments

Comments
 (0)