1515import edu .stanford .nlp .semgraph .SemanticGraph ;
1616import edu .stanford .nlp .semgraph .SemanticGraphEdge ;
1717import edu .stanford .nlp .semgraph .semgrex .SemgrexMatcher ;
18+ import edu .stanford .nlp .trees .GrammaticalRelation ;
19+ import edu .stanford .nlp .util .Pair ;
1820
1921/**
2022 * Combines two words into one word
@@ -60,6 +62,31 @@ public String toEditString() {
6062 return buf .toString ();
6163 }
6264
65+ /**
66+ * Test if two nodes have the same parents with the same relations.
67+ * If so, then the two nodes can be treated as equivalent when merging nodes.
68+ * Otherwise, since there are two different heads, we can't pick a node
69+ * to treat as the head of the phrase, and we will have to abort
70+ */
71+ public static boolean hasSameParents (SemanticGraph sg , IndexedWord head , IndexedWord candidate , Set <IndexedWord > ignoreNodes ) {
72+ Set <Pair <IndexedWord , GrammaticalRelation >> headParents = new HashSet <>();
73+ Set <Pair <IndexedWord , GrammaticalRelation >> candidateParents = new HashSet <>();
74+
75+ for (SemanticGraphEdge edge : sg .incomingEdgeIterable (head )) {
76+ // iterating all parents is relevant for enhanced graphs, for example
77+ if (ignoreNodes .contains (edge .getGovernor ()))
78+ continue ;
79+ headParents .add (new Pair <>(edge .getGovernor (), edge .getRelation ()));
80+ }
81+ for (SemanticGraphEdge edge : sg .incomingEdgeIterable (candidate )) {
82+ // iterating all parents is relevant for enhanced graphs, for example
83+ if (ignoreNodes .contains (edge .getGovernor ()))
84+ continue ;
85+ candidateParents .add (new Pair <>(edge .getGovernor (), edge .getRelation ()));
86+ }
87+ return headParents .equals (candidateParents );
88+ }
89+
6390 /**
6491 * If the named nodes are next to each other, and the edges of
6592 * the graph allow for it, squish those words into one word
@@ -76,6 +103,12 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
76103 }
77104
78105 IndexedWord head = null ;
106+ // Words who share the same parents will go in this set
107+ // Therefore, we can later remap any edges going to that word
108+ // to point to the chosen head instead
109+ // This will let us process phrases where two words could have
110+ // been the head and both have edges coming in to them
111+ Set <IndexedWord > equivalentHeads = new HashSet <>();
79112 for (IndexedWord candidate : nodeSet ) {
80113 Set <IndexedWord > parents = sg .getParents (candidate );
81114 if (parents .size () == 0 ) {
@@ -96,9 +129,10 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
96129 // parent is outside this subtree
97130 // therefore, we can use this word as the head of the subtree
98131 if (head != null ) {
99- if (parents . equals (sg . getParents ( head ) )) {
100- // if the parents of the other node are the same, we can keep going
132+ if (hasSameParents (sg , head , candidate , nodeSet )) {
133+ // if the parents *and relations* of the other node are the same, we can keep going
101134 // since the nodes are about to merge anyway
135+ equivalentHeads .add (candidate );
102136 break ;
103137 } else {
104138 // if we already have a head with different parents, give up instead
@@ -114,18 +148,36 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
114148 }
115149
116150 // for now, only allow the head to have edges to children outside the subtree
117- // TODO: instead, could make them all point to the new merged word...
118- // but it's not clear that's a structure we want to allow merged
151+ // also, words with the same parents as the new head can have outgoing edges
152+ // TODO: not clear we want to allow other words with different
153+ // heads to be merged in this manner
154+ List <SemanticGraphEdge > reattachEdges = new ArrayList <>();
119155 for (IndexedWord candidate : nodeSet ) {
120156 if (candidate == head ) {
121157 continue ;
122158 }
123- for (IndexedWord child : sg .getChildren (candidate )) {
124- if (!nodeSet .contains (child )) {
125- return false ;
159+ for (SemanticGraphEdge edge : sg .outgoingEdgeIterable (candidate )) {
160+ IndexedWord gov = edge .getGovernor ();
161+ if (gov != candidate ) {
162+ throw new AssertionError ();
163+ }
164+ IndexedWord dep = edge .getDependent ();
165+ if (!nodeSet .contains (dep )) {
166+ if (equivalentHeads .contains (candidate )) {
167+ reattachEdges .add (edge );
168+ } else {
169+ return false ;
170+ }
126171 }
127172 }
128173 }
174+
175+ // at this point, everything checks out and we can start manipulating the graph
176+ // we will start by reattaching incoming edges to the chosen head
177+ for (SemanticGraphEdge edge : reattachEdges ) {
178+ ReattachNamedEdge .reattachEdge (sg , sm , edge , null , head , edge .getDependent ());
179+ }
180+
129181 ArrayList <IndexedWord > nodes = new ArrayList <>(nodeSet );
130182 Collections .sort (nodes );
131183
0 commit comments