Skip to content

Commit 4c5143b

Browse files
committed
MergeNodes can now handle multiple nodes at the same time
1 parent d9083ce commit 4c5143b

File tree

2 files changed

+75
-62
lines changed

2 files changed

+75
-62
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/MergeNodes.java

Lines changed: 74 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22

33
import java.io.StringWriter;
44
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.HashSet;
57
import java.util.List;
68
import java.util.Map;
9+
import java.util.Set;
710
import java.util.TreeMap;
811

912
import edu.stanford.nlp.ling.CoreAnnotations;
@@ -26,14 +29,11 @@
2629
*/
2730
public class MergeNodes extends SsurgeonEdit {
2831
public static final String LABEL = "mergeNodes";
29-
final List<String> nodes;
32+
final List<String> names;
3033
final Map<String, String> attributes;
3134

32-
public MergeNodes(List<String> nodes, Map<String, String> attributes) {
33-
if (nodes.size() > 2) {
34-
throw new SsurgeonParseException("Cannot support MergeNodes of size " + nodes.size() + " yet... please file an issue on github if you need this feature");
35-
}
36-
this.nodes = new ArrayList<>(nodes);
35+
public MergeNodes(List<String> names, Map<String, String> attributes) {
36+
this.names = new ArrayList<>(names);
3737
this.attributes = new TreeMap<>(attributes);
3838
}
3939

@@ -44,7 +44,7 @@ public MergeNodes(List<String> nodes, Map<String, String> attributes) {
4444
public String toEditString() {
4545
StringWriter buf = new StringWriter();
4646
buf.write(LABEL);
47-
for (String name : nodes) {
47+
for (String name : names) {
4848
buf.write("\t");
4949
buf.write(Ssurgeon.NODENAME_ARG + " " + name);
5050
}
@@ -61,90 +61,97 @@ public String toEditString() {
6161
}
6262

6363
/**
64-
* If the two named nodes are next to each other, and the edges of
65-
* the graph allow for it, squish the two words into one word
64+
* If the named nodes are next to each other, and the edges of
65+
* the graph allow for it, squish those words into one word
6666
*/
6767
@Override
6868
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
69-
String name1 = nodes.get(0);
70-
String name2 = nodes.get(1);
71-
72-
IndexedWord node1 = sm.getNode(name1);
73-
IndexedWord node2 = sm.getNode(name2);
74-
75-
if (node1 == null || node2 == null) {
76-
return false;
77-
}
78-
79-
List<SemanticGraphEdge> n1_to_n2 = sg.getAllEdges(node1, node2);
80-
List<SemanticGraphEdge> n2_to_n1 = sg.getAllEdges(node2, node1);
81-
if (n1_to_n2.size() == 0 && n2_to_n1.size() == 0) {
82-
return false;
83-
}
84-
85-
// TODO: what about the case where the dep is or has copies?
86-
final IndexedWord head;
87-
final IndexedWord dep;
88-
89-
if (n1_to_n2.size() > 0) {
90-
head = node1;
91-
dep = node2;
92-
} else {
93-
head = node2;
94-
dep = node1;
95-
}
96-
97-
// If the dep has any edges that aren't between dep & head, abort
98-
// TODO: we could probably make it adjust edges with "dep" as source, instead
99-
for (SemanticGraphEdge e : sg.outgoingEdgeIterable(dep)) {
100-
if (e.getTarget() != head) {
69+
List<IndexedWord> nodes = new ArrayList<>();
70+
for (String name : names) {
71+
IndexedWord node = sm.getNode(name);
72+
if (node == null) {
10173
return false;
10274
}
103-
}
104-
for (SemanticGraphEdge e : sg.incomingEdgeIterable(dep)) {
105-
if (e.getSource() != head) {
106-
return false;
75+
nodes.add(node);
76+
}
77+
Collections.sort(nodes);
78+
79+
IndexedWord head = null;
80+
for (IndexedWord candidate : nodes) {
81+
if (sg.hasChildren(candidate)) {
82+
// if multiple nodes have children inside the graph,
83+
// perhaps we could merge them all,
84+
// but the easiest thing to do is just abort
85+
// TODO: an alternate approach would be to look for nodes with a head
86+
// outside the nodes in the phrase to merge
87+
if (head != null) {
88+
return false;
89+
}
90+
head = candidate;
10791
}
10892
}
10993

110-
IndexedWord left;
111-
IndexedWord right;
112-
if (node1.index() < node2.index()) {
113-
left = node1;
114-
right = node2;
115-
} else {
116-
left = node2;
117-
right = node1;
94+
Set<Integer> depIndices = new HashSet<Integer>();
95+
for (IndexedWord other : nodes) {
96+
if (other == head) {
97+
continue;
98+
}
99+
Set<IndexedWord> parents = sg.getParents(other);
100+
// this shouldn't happen
101+
if (parents.size() == 0) {
102+
return false;
103+
}
104+
// iterate instead of just do the first in case
105+
// this one day is doing an graph with extra dependencies
106+
for (IndexedWord parent : parents) {
107+
if (parent != head) {
108+
return false;
109+
}
110+
}
111+
depIndices.add(other.index());
118112
}
119113

120114
CoreLabel newLabel = AddDep.fromCheapStrings(attributes);
121115
// CoreLabel.setWord wipes out the lemma for some reason
122116
// we may eventually change that, but for now, we compensate for that here
123117
String lemma = newLabel.lemma();
118+
124119
if (newLabel.word() == null) {
125-
String newWord = left.word() + right.word();
126-
newLabel.setWord(newWord);
120+
StringBuilder newWord = new StringBuilder();
121+
for (IndexedWord node : nodes) {
122+
newWord.append(node.word());
123+
}
124+
newLabel.setWord(newWord.toString());
127125
}
128126
if (newLabel.value() == null) {
129127
newLabel.setValue(newLabel.word());
130128
}
131129

132130
newLabel.setLemma(lemma);
133131
if (newLabel.lemma() == null) {
134-
String newLemma = left.lemma() != null && right.lemma() != null ? left.lemma() + right.lemma() : null;
135-
newLabel.setLemma(newLemma);
132+
StringBuilder newLemma = new StringBuilder();
133+
for (IndexedWord node : nodes) {
134+
if (node.lemma() != null) {
135+
newLemma.append(node.lemma());
136+
}
137+
}
138+
lemma = newLemma.length() > 0 ? newLemma.toString() : null;
139+
newLabel.setLemma(lemma);
136140
}
141+
137142
// after() and before() return "" if null, so we need to use the CoreAnnotations directly
138143
if (newLabel.get(CoreAnnotations.AfterAnnotation.class) == null) {
139-
newLabel.setAfter(right.after());
144+
newLabel.setAfter(nodes.get(nodes.size() - 1).after());
140145
}
141146
if (newLabel.get(CoreAnnotations.BeforeAnnotation.class) == null) {
142-
newLabel.setBefore(right.before());
147+
newLabel.setBefore(nodes.get(0).before());
143148
}
144149

145150
// find the head, and replace all the existing annotations on the head
146151
// with the new annotations (including word and lemma)
147152
// from the newly built CoreLabel
153+
// TODO: should avoid messing with empty nodes
154+
// doing extra nodes would be good
148155
for (IndexedWord vertex : sg.vertexSet()) {
149156
if (vertex.index() == head.index()) {
150157
for (Class key : newLabel.keySet()) {
@@ -159,13 +166,19 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
159166
// TODO: super fancy would be implementing iterator.remove()
160167
// on the Set returned by the SemanticGraph
161168
for (IndexedWord vertex : sg.vertexListSorted()) {
162-
if (vertex.index() == dep.index()) {
169+
// TODO: again, don't delete empty nodes
170+
if (depIndices.contains(vertex.index())) {
163171
sg.removeVertex(vertex);
164172
}
165173
}
166174

167175
// reindex everyone
168-
SsurgeonUtils.moveNodes(sg, sm, x -> (x >= dep.index()), x -> x-1, false);
176+
List<Integer> sortedIndices = new ArrayList<>(depIndices);
177+
Collections.sort(sortedIndices);
178+
Collections.reverse(sortedIndices);
179+
for (Integer depIndex : sortedIndices) {
180+
SsurgeonUtils.moveNodes(sg, sm, x -> (x >= depIndex), x -> x-1, false);
181+
}
169182

170183
return true;
171184
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
* <li> {@code combineMWT -node node -word word}
8888
* <li> {@code splitWord -node node -headIndex idx -reln depType -regex w1 -regex w2 ...}
8989
* <li> {@code setRoots n1 (n2 n3 ...)}
90-
* <li> {@code mergeNodes n1 n2}
90+
* <li> {@code mergeNodes -node n1 -node n2 ...}
9191
* <li> {@code killAllIncomingEdges -node node}
9292
* <li> {@code delete -node node}
9393
* <li> {@code deleteLeaf -node node}

0 commit comments

Comments
 (0)