22
33import java .io .StringWriter ;
44import java .util .ArrayList ;
5+ import java .util .Collections ;
6+ import java .util .HashSet ;
57import java .util .List ;
68import java .util .Map ;
9+ import java .util .Set ;
710import java .util .TreeMap ;
811
912import edu .stanford .nlp .ling .CoreAnnotations ;
2629 */
2730public class MergeNodes extends SsurgeonEdit {
2831 public static final String LABEL = "mergeNodes" ;
29- final List <String > nodes ;
32+ final List <String > names ;
3033 final Map <String , String > attributes ;
3134
32- public MergeNodes (List <String > nodes , Map <String , String > attributes ) {
33- if (nodes .size () > 2 ) {
34- throw new SsurgeonParseException ("Cannot support MergeNodes of size " + nodes .size () + " yet... please file an issue on github if you need this feature" );
35- }
36- this .nodes = new ArrayList <>(nodes );
35+ public MergeNodes (List <String > names , Map <String , String > attributes ) {
36+ this .names = new ArrayList <>(names );
3737 this .attributes = new TreeMap <>(attributes );
3838 }
3939
@@ -44,7 +44,7 @@ public MergeNodes(List<String> nodes, Map<String, String> attributes) {
4444 public String toEditString () {
4545 StringWriter buf = new StringWriter ();
4646 buf .write (LABEL );
47- for (String name : nodes ) {
47+ for (String name : names ) {
4848 buf .write ("\t " );
4949 buf .write (Ssurgeon .NODENAME_ARG + " " + name );
5050 }
@@ -61,90 +61,97 @@ public String toEditString() {
6161 }
6262
6363 /**
64- * If the two named nodes are next to each other, and the edges of
65- * the graph allow for it, squish the two words into one word
64+ * If the named nodes are next to each other, and the edges of
65+ * the graph allow for it, squish those words into one word
6666 */
6767 @ Override
6868 public boolean evaluate (SemanticGraph sg , SemgrexMatcher sm ) {
69- String name1 = nodes .get (0 );
70- String name2 = nodes .get (1 );
71-
72- IndexedWord node1 = sm .getNode (name1 );
73- IndexedWord node2 = sm .getNode (name2 );
74-
75- if (node1 == null || node2 == null ) {
76- return false ;
77- }
78-
79- List <SemanticGraphEdge > n1_to_n2 = sg .getAllEdges (node1 , node2 );
80- List <SemanticGraphEdge > n2_to_n1 = sg .getAllEdges (node2 , node1 );
81- if (n1_to_n2 .size () == 0 && n2_to_n1 .size () == 0 ) {
82- return false ;
83- }
84-
85- // TODO: what about the case where the dep is or has copies?
86- final IndexedWord head ;
87- final IndexedWord dep ;
88-
89- if (n1_to_n2 .size () > 0 ) {
90- head = node1 ;
91- dep = node2 ;
92- } else {
93- head = node2 ;
94- dep = node1 ;
95- }
96-
97- // If the dep has any edges that aren't between dep & head, abort
98- // TODO: we could probably make it adjust edges with "dep" as source, instead
99- for (SemanticGraphEdge e : sg .outgoingEdgeIterable (dep )) {
100- if (e .getTarget () != head ) {
69+ List <IndexedWord > nodes = new ArrayList <>();
70+ for (String name : names ) {
71+ IndexedWord node = sm .getNode (name );
72+ if (node == null ) {
10173 return false ;
10274 }
103- }
104- for (SemanticGraphEdge e : sg .incomingEdgeIterable (dep )) {
105- if (e .getSource () != head ) {
106- return false ;
75+ nodes .add (node );
76+ }
77+ Collections .sort (nodes );
78+
79+ IndexedWord head = null ;
80+ for (IndexedWord candidate : nodes ) {
81+ if (sg .hasChildren (candidate )) {
82+ // if multiple nodes have children inside the graph,
83+ // perhaps we could merge them all,
84+ // but the easiest thing to do is just abort
85+ // TODO: an alternate approach would be to look for nodes with a head
86+ // outside the nodes in the phrase to merge
87+ if (head != null ) {
88+ return false ;
89+ }
90+ head = candidate ;
10791 }
10892 }
10993
110- IndexedWord left ;
111- IndexedWord right ;
112- if (node1 .index () < node2 .index ()) {
113- left = node1 ;
114- right = node2 ;
115- } else {
116- left = node2 ;
117- right = node1 ;
94+ Set <Integer > depIndices = new HashSet <Integer >();
95+ for (IndexedWord other : nodes ) {
96+ if (other == head ) {
97+ continue ;
98+ }
99+ Set <IndexedWord > parents = sg .getParents (other );
100+ // this shouldn't happen
101+ if (parents .size () == 0 ) {
102+ return false ;
103+ }
104+ // iterate instead of just do the first in case
105+ // this one day is doing an graph with extra dependencies
106+ for (IndexedWord parent : parents ) {
107+ if (parent != head ) {
108+ return false ;
109+ }
110+ }
111+ depIndices .add (other .index ());
118112 }
119113
120114 CoreLabel newLabel = AddDep .fromCheapStrings (attributes );
121115 // CoreLabel.setWord wipes out the lemma for some reason
122116 // we may eventually change that, but for now, we compensate for that here
123117 String lemma = newLabel .lemma ();
118+
124119 if (newLabel .word () == null ) {
125- String newWord = left .word () + right .word ();
126- newLabel .setWord (newWord );
120+ StringBuilder newWord = new StringBuilder ();
121+ for (IndexedWord node : nodes ) {
122+ newWord .append (node .word ());
123+ }
124+ newLabel .setWord (newWord .toString ());
127125 }
128126 if (newLabel .value () == null ) {
129127 newLabel .setValue (newLabel .word ());
130128 }
131129
132130 newLabel .setLemma (lemma );
133131 if (newLabel .lemma () == null ) {
134- String newLemma = left .lemma () != null && right .lemma () != null ? left .lemma () + right .lemma () : null ;
135- newLabel .setLemma (newLemma );
132+ StringBuilder newLemma = new StringBuilder ();
133+ for (IndexedWord node : nodes ) {
134+ if (node .lemma () != null ) {
135+ newLemma .append (node .lemma ());
136+ }
137+ }
138+ lemma = newLemma .length () > 0 ? newLemma .toString () : null ;
139+ newLabel .setLemma (lemma );
136140 }
141+
137142 // after() and before() return "" if null, so we need to use the CoreAnnotations directly
138143 if (newLabel .get (CoreAnnotations .AfterAnnotation .class ) == null ) {
139- newLabel .setAfter (right .after ());
144+ newLabel .setAfter (nodes . get ( nodes . size () - 1 ) .after ());
140145 }
141146 if (newLabel .get (CoreAnnotations .BeforeAnnotation .class ) == null ) {
142- newLabel .setBefore (right .before ());
147+ newLabel .setBefore (nodes . get ( 0 ) .before ());
143148 }
144149
145150 // find the head, and replace all the existing annotations on the head
146151 // with the new annotations (including word and lemma)
147152 // from the newly built CoreLabel
153+ // TODO: should avoid messing with empty nodes
154+ // doing extra nodes would be good
148155 for (IndexedWord vertex : sg .vertexSet ()) {
149156 if (vertex .index () == head .index ()) {
150157 for (Class key : newLabel .keySet ()) {
@@ -159,13 +166,19 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
159166 // TODO: super fancy would be implementing iterator.remove()
160167 // on the Set returned by the SemanticGraph
161168 for (IndexedWord vertex : sg .vertexListSorted ()) {
162- if (vertex .index () == dep .index ()) {
169+ // TODO: again, don't delete empty nodes
170+ if (depIndices .contains (vertex .index ())) {
163171 sg .removeVertex (vertex );
164172 }
165173 }
166174
167175 // reindex everyone
168- SsurgeonUtils .moveNodes (sg , sm , x -> (x >= dep .index ()), x -> x -1 , false );
176+ List <Integer > sortedIndices = new ArrayList <>(depIndices );
177+ Collections .sort (sortedIndices );
178+ Collections .reverse (sortedIndices );
179+ for (Integer depIndex : sortedIndices ) {
180+ SsurgeonUtils .moveNodes (sg , sm , x -> (x >= depIndex ), x -> x -1 , false );
181+ }
169182
170183 return true ;
171184 }
0 commit comments