@@ -40,25 +40,32 @@ public class SplitWord extends SsurgeonEdit {
4040
4141 final String node ;
4242 final List <Pattern > nodeRegex ;
43+ final List <String > exactPieces ;
4344 final int headIndex ;
4445 final GrammaticalRelation relation ;
4546 final Map <Integer , String > nodeNames ;
4647
47- public SplitWord (String node , List <String > nodeRegex , Integer headIndex , GrammaticalRelation relation , String nodeNames ) {
48+ public SplitWord (String node , List <String > nodePieces , Integer headIndex , GrammaticalRelation relation , String nodeNames , boolean exactSplit ) {
4849 if (node == null ) {
4950 throw new SsurgeonParseException ("SplitWord expected -node with the name of the matched node to split" );
5051 }
5152 this .node = node ;
5253
53- if (nodeRegex == null || nodeRegex .size () == 0 ) {
54- throw new SsurgeonParseException ("SplitWord expected -regex with regex to determine which pieces to split the word into" );
54+ if (nodePieces == null || nodePieces .size () == 0 ) {
55+ throw new SsurgeonParseException ("SplitWord expected -exact or - regex with regex to determine which pieces to split the word into" );
5556 }
56- if (nodeRegex .size () == 1 ) {
57- throw new SsurgeonParseException ("SplitWord expected at least two -regex" );
57+ if (nodePieces .size () == 1 ) {
58+ throw new SsurgeonParseException ("SplitWord expected at least two -exact or - regex" );
5859 }
59- this .nodeRegex = new ArrayList <>();
60- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
61- this .nodeRegex .add (Pattern .compile (nodeRegex .get (i )));
60+ if (exactSplit ) {
61+ this .exactPieces = new ArrayList <>(nodePieces );
62+ this .nodeRegex = null ;
63+ } else {
64+ this .nodeRegex = new ArrayList <>();
65+ for (int i = 0 ; i < nodePieces .size (); ++i ) {
66+ this .nodeRegex .add (Pattern .compile (nodePieces .get (i )));
67+ }
68+ this .exactPieces = null ;
6269 }
6370
6471 if (headIndex == null ) {
@@ -80,7 +87,7 @@ public SplitWord(String node, List<String> nodeRegex, Integer headIndex, Grammat
8087 throw new SsurgeonParseException ("SplitWord got a -name parameter which did not have a number for one of the names. Should look like 0=foo,1=bar" );
8188 }
8289 int idx = Integer .valueOf (pieces [0 ]);
83- if (idx >= this . nodeRegex .size ()) {
90+ if (idx >= nodePieces .size ()) {
8491 throw new SsurgeonParseException ("SplitWord got an index in -name which was larger than the largest possible split piece, " + idx + " (this is 0-indexed)" );
8592 }
8693 this .nodeNames .put (idx , pieces [1 ]);
@@ -96,8 +103,14 @@ public String toEditString() {
96103 buf .write (LABEL );
97104 buf .write ("\t " );
98105 buf .write ("-node " + node + "\t " );
99- for (Pattern regex : nodeRegex ) {
100- buf .write ("-regex " + regex + "\t " );
106+ if (nodeRegex != null ) {
107+ for (Pattern regex : nodeRegex ) {
108+ buf .write ("-regex " + regex + "\t " );
109+ }
110+ } else {
111+ for (String piece : exactPieces ) {
112+ buf .write ("-exact " + piece + "\t " );
113+ }
101114 }
102115 buf .write ("-reln " + relation .toString () + "\t " );
103116 buf .write ("-headIndex " + headIndex );
@@ -113,22 +126,27 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
113126 //
114127 // each new word created will be the concatenation of all of the
115128 // matching groups from this pattern
116- List <String > words = new ArrayList <>();
117- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
118- Matcher regexMatcher = nodeRegex .get (i ).matcher (origWord );
119- if (!regexMatcher .matches ()) {
120- return false ;
121- }
129+ List <String > words ;
130+ if (exactPieces != null ) {
131+ words = new ArrayList <>(exactPieces );
132+ } else {
133+ words = new ArrayList <>();
134+ for (int i = 0 ; i < nodeRegex .size (); ++i ) {
135+ Matcher regexMatcher = nodeRegex .get (i ).matcher (origWord );
136+ if (!regexMatcher .matches ()) {
137+ return false ;
138+ }
122139
123- StringBuilder newWordBuilder = new StringBuilder ();
124- for (int j = 0 ; j < regexMatcher .groupCount (); ++j ) {
125- newWordBuilder .append (regexMatcher .group (j +1 ));
126- }
127- String newWord = newWordBuilder .toString ();
128- if (newWord .length () == 0 ) {
129- return false ;
140+ StringBuilder newWordBuilder = new StringBuilder ();
141+ for (int j = 0 ; j < regexMatcher .groupCount (); ++j ) {
142+ newWordBuilder .append (regexMatcher .group (j +1 ));
143+ }
144+ String newWord = newWordBuilder .toString ();
145+ if (newWord .length () == 0 ) {
146+ return false ;
147+ }
148+ words .add (newWord );
130149 }
131- words .add (newWord );
132150 }
133151
134152 int matchedIndex = matchedNode .index ();
@@ -137,7 +155,7 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
137155
138156 // move all words down by nodeRegex.size() - 1
139157 // then move the original word down by headIndex
140- SsurgeonUtils .moveNodes (sg , sm , x -> (x > matchedIndex ), x -> x +nodeRegex .size () - 1 , true );
158+ SsurgeonUtils .moveNodes (sg , sm , x -> (x > matchedIndex ), x -> x +words .size () - 1 , true );
141159 // the head node has its word replaced, and its index & links need
142160 // to be rearranged, but none of the links are added or removed
143161 if (headIndex > 0 ) {
@@ -147,7 +165,8 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
147165 matchedNode .setWord (words .get (headIndex ));
148166 matchedNode .setValue (words .get (headIndex ));
149167
150- for (int i = 0 ; i < nodeRegex .size (); ++i ) {
168+ // TODO: update SpaceAfter in a reasonable manner
169+ for (int i = 0 ; i < words .size (); ++i ) {
151170 if (i == headIndex ) {
152171 if (nodeNames .containsKey (i )) {
153172 sm .putNode (nodeNames .get (i ), matchedNode );
0 commit comments