Skip to content

Commit e06eb55

Browse files
mjohnson541nyee
authored andcommitted
Add a database test for unimolecular groups with multiple trees
Checks that: -End group labels are consistent throughout each end group tree -Backbone has all labels present in the end groups -Backbone has labels indicating the shortest path between end groups -Backbone has end group subgraphs that is the top level of the corresponding end group tree
1 parent cfd05a4 commit e06eb55

File tree

1 file changed

+183
-3
lines changed

1 file changed

+183
-3
lines changed

testing/databaseTest.py

Lines changed: 183 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from rmgpy.data.base import LogicOr
1111
from rmgpy.molecule import Group
1212
from rmgpy.molecule.atomtype import atomTypes
13+
from rmgpy.molecule.pathfinder import find_shortest_path
1314

1415
import nose
1516
import nose.tools
@@ -79,7 +80,14 @@ def test_kinetics(self):
7980
test.description = test_name
8081
self.compat_func_name = test_name
8182
yield test, family_name
82-
83+
84+
if len(family.forwardTemplate.reactants)==1 and len(family.groups.top) != 1 and family_name != 'Diels_alder_addition':
85+
test = lambda x: self.kinetics_checkUnimolecularGroups(family_name)
86+
test_name = "Kinetics family {0} check that unimolecular group is formatted correctly?".format(family_name)
87+
test.description = test_name
88+
self.compat_func_name = test_name
89+
yield test, family_name
90+
8391
for depository in family.depositories:
8492

8593
test = lambda x: self.kinetics_checkAdjlistsNonidentical(depository)
@@ -95,7 +103,8 @@ def test_kinetics(self):
95103
test.description = test_name
96104
self.compat_func_name = test_name
97105
yield test, library_name
98-
106+
107+
99108
def test_thermo(self):
100109
for group_name, group in self.database.thermo.groups.iteritems():
101110
test = lambda x: self.general_checkNodesFoundInTree(group_name, group)
@@ -440,7 +449,178 @@ def kinetics_checkCdAtomType(self, family_name):
440449
{4}
441450
""".format(family_name, entry, correctAtom, index+1, entry.item.toAdjacencyList()))
442451

443-
def general_checkNodesFoundInTree(self, group_name, group):
452+
def kinetics_checkUnimolecularGroups(self,family_name):
453+
"""
454+
This test goes through all unimolecular groups that have more than one top level, the first
455+
top level is assumed to be the backbone (contains the whole reactant molecule) and the other top levels are assumed
456+
to be endgroups
457+
the following are checked:
458+
1)endgroup entries have the same labels as their top level entry
459+
2)backbone groups have all labels that endgroups have
460+
3)backbone groups have labels tracing between the endgroups that follow the shortest path
461+
4)the backbone subgraph corresponding to each endgroup is the top level entry of the
462+
corresponding endgroup for every endgroup
463+
"""
464+
def find_shortest_path(start, end, path=None):
465+
path = path if path else []
466+
path = path + [start]
467+
if start == end:
468+
return path
469+
470+
shortest = None
471+
for node,_ in start.bonds.iteritems():
472+
if node not in path:
473+
newpath = find_shortest_path(node, end, path)
474+
if newpath:
475+
if not shortest or len(newpath) < len(shortest):
476+
shortest = newpath
477+
return shortest
478+
479+
def getEndFromBackbone(backbone, endLabels):
480+
"""
481+
:param backbone: :class: Entry for a backbone of molecule
482+
:param endLabels: Labels in the end groups
483+
:return: A subgraph representing the end group of the molecule
484+
"""
485+
#make copy for manipulation
486+
copyGroup = backbone.item.copy(True)
487+
488+
#Find the endGroup atoms
489+
for atom in copyGroup.atoms:
490+
if atom.label in endLabels:
491+
midAtom = atom
492+
break
493+
494+
#find the bonds to break
495+
bondsToBreak = []
496+
for atom2, bond in midAtom.bonds.iteritems():
497+
if atom2.label is None or atom2.label not in endLabels: #
498+
bondsToBreak.append(bond)
499+
500+
501+
for bond in bondsToBreak:
502+
copyGroup.removeBond(bond)
503+
504+
#split group into end and backbone fragment
505+
groups = copyGroup.split()
506+
507+
#verify group was split correctly and identify the correct end group
508+
endLabels = set(endLabels)
509+
for group in groups:
510+
groupLabels = set(atom.label for atom in group.atoms)
511+
groupLabels.discard('')
512+
if endLabels == groupLabels:
513+
break
514+
else:
515+
print(endLabels)
516+
print(groupLabels)
517+
for group in groups:
518+
print(group.toAdjacencyList(label=backbone.label))
519+
raise Exception("Group {0} not split correctly".format(backbone.label))
520+
521+
return group
522+
523+
524+
family = self.database.kinetics.families[family_name]
525+
526+
backbone = family.forwardTemplate.reactants[0]
527+
528+
endGroups = [entry for entry in family.groups.top if entry not in family.forwardTemplate.reactants]
529+
530+
endLabels = {}
531+
for endGroup in endGroups:
532+
labels = []
533+
for atom in endGroup.item.atoms:
534+
if atom.label:
535+
labels.append(atom.label)
536+
endLabels[endGroup] = set(labels)
537+
538+
#one atom from each end group
539+
midLabels = ["*1", "*3"]
540+
541+
# set of all end_labels should be backbone label
542+
backboneLabel = set([])
543+
for end, end_label in endLabels.iteritems():
544+
for label in end_label:
545+
backboneLabel.add(label)
546+
547+
#define types of errors
548+
A = [] #end groups have too many labels
549+
B = [] #end group lacks necessary label
550+
C = [] #backbone missing end group labels
551+
D = [] #backbone missing labels in between groups
552+
E = [] #backbone tries to define atoms inside end groups
553+
for group_name, entry in family.groups.entries.iteritems():
554+
if isinstance(entry.item, Group):
555+
group = entry.item
556+
if backbone in family.ancestors(entry):
557+
for atom in group.atoms:
558+
if atom.label: presentLabels.add(atom.label)
559+
#Check C
560+
for endGroup, labels in endLabels.iteritems():
561+
if not labels.issubset(presentLabels):
562+
C.append([endGroup, entry])
563+
#check D
564+
midAtoms = [group.getLabeledAtom(x) for x in midLabels]
565+
pathAtoms = find_shortest_path(midAtoms[0], midAtoms[1])
566+
for atom in pathAtoms:
567+
if not atom.label:
568+
D.append([backbone, entry])
569+
break
570+
#check E
571+
for endGroup, labels in endLabels.iteritems():
572+
endFromBackbone = getEndFromBackbone(entry, labels)
573+
presentLabels = endFromBackbone.getLabeledAtoms()
574+
presentLabels = set(presentLabels.keys())
575+
if labels == presentLabels:
576+
if not endGroup.item.isIdentical(endFromBackbone):
577+
E.append([endGroup, entry])
578+
else: raise Exception("Group {0} has split into end group {1}, but does not match any root".format(entry.label, endFromBackbone.toAdjacencyList()))
579+
580+
581+
582+
else:
583+
presentLabels = set([])
584+
for endNode, labelledAtoms in endLabels.iteritems():
585+
if endNode in family.ancestors(entry):
586+
for atom in group.atoms:
587+
if atom.label: presentLabels.add(atom.label)
588+
#Check A
589+
if not presentLabels.issubset(labelledAtoms):
590+
A.append([endNode, entry])
591+
#Check B
592+
if not labelledAtoms.issubset(presentLabels):
593+
B.append([endNode, entry])
594+
595+
596+
#print outputs
597+
if A != []:
598+
s = "These end groups have extra labels that their top level end group do not have:"+"\n [root group, error group]"
599+
for x in A:
600+
s += '\n'+str(x)
601+
nose.tools.assert_true(False,s)
602+
if B != []:
603+
s = "These end groups are missing labels that their top level end group have:"+"\n [root group, error group]"
604+
for x in B:
605+
s += '\n'+str(x)
606+
nose.tools.assert_true(False,s)
607+
if C != []:
608+
s = "These backbone groups are missing labels that are in the end groups:"+"\n [root group, error group]"
609+
for x in C:
610+
s += '\n'+str(x)
611+
nose.tools.assert_true(False,s)
612+
if D != []:
613+
s = "These backbone groups are missing labels along the path atoms:"+"\n [root group, error group]"
614+
for x in D:
615+
s += '\n'+str(x)
616+
nose.tools.assert_true(False,s)
617+
if E != []:
618+
s = "These backbone have end subgraphs that don't match a root:"+"\n [root group, error group]"
619+
for x in E:
620+
s += '\n'+str(x)
621+
nose.tools.assert_true(False,s)
622+
623+
def general_checkNodesFoundInTree(self, group_name, group):
444624
"""
445625
This test checks whether nodes are found in the tree, with proper parents.
446626
"""

0 commit comments

Comments
 (0)