@@ -35,6 +35,32 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
3535 return printSemanticGraph (basicSg , enhancedSg , true , basicSg .getComments ());
3636 }
3737
38+ // TODO: put in the same place as CoNLLUReader::unescapeSpacesAfter
39+ public static String escapeSpaces (String after ) {
40+ StringBuilder result = new StringBuilder ();
41+ for (int i = 0 ; i < after .length (); ++i ) {
42+ char next = after .charAt (i );
43+ if (next == ' ' ) {
44+ result .append ("\\ s" );
45+ } else if (next == '\t' ) {
46+ result .append ("\\ t" );
47+ } else if (next == '\r' ) {
48+ result .append ("\\ r" );
49+ } else if (next == '\n' ) {
50+ result .append ("\\ n" );
51+ } else if (next == '|' ) {
52+ result .append ("\\ p" );
53+ } else if (next == '\\' ) {
54+ result .append ("\\ \\ " );
55+ } else if (next == ' ' ) {
56+ result .append ("\\ u00A0" );
57+ } else {
58+ result .append (next );
59+ }
60+ }
61+ return result .toString ();
62+ }
63+
3864 public String printSemanticGraph (SemanticGraph basicSg , SemanticGraph enhancedSg , boolean unescapeParenthesis , Collection <String > comments ) {
3965 StringBuilder sb = new StringBuilder ();
4066
@@ -97,21 +123,29 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
97123 String relnName = reln == null ? "_" : reln .toString ();
98124
99125 // don't use after() directly; it returns a default of ""
100- // TODO: does this handle SpaceAfter on other tokens or SpacesAfter?
101- if (token .get (CoreAnnotations .AfterAnnotation .class ) != null && token .after ().equals ("" )) {
102- IndexedWord nextVertex = tokenSg .getNodeByIndexSafe (token .index () + 1 );
103- // the next word needs to exist and be part of the same MWT
104- // and either this word is the start of the MWT
105- // or this word is the middle of the same MWT as the next word
106- // if that is true, we will skip the SpaceAfter annotation
107- boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
108- ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
109- (isMWTbutNotStart (token ))));
110- if (!inMWT ) {
111- if (misc .equals ("_" )) {
112- misc = "SpaceAfter=No" ;
126+ // TODO: also print SpacesBefore on the first token
127+ if (token .get (CoreAnnotations .AfterAnnotation .class ) != null ) {
128+ String after = token .after ();
129+ if (!after .equals (" " )) {
130+ if (after .equals ("" )) {
131+ after = "SpaceAfter=No" ;
113132 } else {
114- misc = misc + "|SpaceAfter=No" ;
133+ after = "SpacesAfter=" + escapeSpaces (after );
134+ }
135+ IndexedWord nextVertex = tokenSg .getNodeByIndexSafe (token .index () + 1 );
136+ // the next word needs to exist and be part of the same MWT
137+ // and either this word is the start of the MWT
138+ // or this word is the middle of the same MWT as the next word
139+ // if that is true, we will skip the SpaceAfter annotation
140+ boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
141+ ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
142+ (isMWTbutNotStart (token ))));
143+ if (!inMWT ) {
144+ if (misc .equals ("_" )) {
145+ misc = after ;
146+ } else {
147+ misc = misc + "|" + after ;
148+ }
115149 }
116150 }
117151 }
0 commit comments