@@ -101,10 +101,20 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
101101
102102 // don't use after() directly; it returns a default of ""
103103 if (token .get (CoreAnnotations .AfterAnnotation .class ) != null && token .after ().equals ("" )) {
104- if (misc .equals ("_" )) {
105- misc = "SpaceAfter=No" ;
106- } else {
107- misc = misc + "|SpaceAfter=No" ;
104+ IndexedWord nextVertex = tokenSg .getNodeByIndex (token .index () + 1 );
105+ // the next word needs to exist and be part of the same MWT
106+ // and either this word is the start of the MWT
107+ // or this word is the middle of the same MWT as the next word
108+ // if that is true, we will skip the SpaceAfter annotation
109+ boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
110+ ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
111+ (isMWTbutNotStart (token ))));
112+ if (!inMWT ) {
113+ if (misc .equals ("_" )) {
114+ misc = "SpaceAfter=No" ;
115+ } else {
116+ misc = misc + "|SpaceAfter=No" ;
117+ }
108118 }
109119 }
110120
@@ -151,19 +161,29 @@ public static void printSpan(StringBuilder sb, AbstractCoreLabel token) {
151161 }
152162 }
153163
164+ /**
165+ * Is the word part of an MWT, but not the start?
166+ */
167+ public static boolean isMWTbutNotStart (IndexedWord nextVertex ) {
168+ if (nextVertex .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) &&
169+ nextVertex .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) {
170+ return false ;
171+ }
172+ if (!nextVertex .containsKey (CoreAnnotations .IsMultiWordTokenAnnotation .class ) ||
173+ !nextVertex .get (CoreAnnotations .IsMultiWordTokenAnnotation .class )) {
174+ return false ;
175+ }
176+ return true ;
177+ }
178+
154179 public static void printMWT (StringBuilder sb , SemanticGraph graph , IndexedWord token ) {
155180 int startIndex = token .index ();
156181 int endIndex = startIndex ;
157182 // advance endIndex until we reach the end of the sentence, the start of the next MWT,
158183 // or a word which isn't part of any MWT
159184 IndexedWord nextVertex ;
160185 while ((nextVertex = graph .getNodeByIndex (endIndex +1 )) != null ) {
161- if (nextVertex .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) &&
162- nextVertex .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) {
163- break ;
164- }
165- if (!nextVertex .containsKey (CoreAnnotations .IsMultiWordTokenAnnotation .class ) ||
166- !nextVertex .get (CoreAnnotations .IsMultiWordTokenAnnotation .class )) {
186+ if (!isMWTbutNotStart (nextVertex )) {
167187 break ;
168188 }
169189 ++endIndex ;
0 commit comments