1919import static org .antlr .codebuff .CollectFeatures .CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ;
2020import static org .antlr .codebuff .CollectFeatures .CAT_INJECT_NL ;
2121import static org .antlr .codebuff .CollectFeatures .CAT_INJECT_WS ;
22+ import static org .antlr .codebuff .CollectFeatures .CAT_NO_ALIGNMENT ;
2223import static org .antlr .codebuff .CollectFeatures .FEATURES_ALIGN ;
2324import static org .antlr .codebuff .CollectFeatures .FEATURES_INJECT_WS ;
2425import static org .antlr .codebuff .CollectFeatures .INDEX_FIRST_ON_LINE ;
25- import static org .antlr .codebuff .CollectFeatures .INDEX_PREV_END_COLUMN ;
26+ import static org .antlr .codebuff .CollectFeatures .INDEX_MATCHING_TOKEN_DIFF_LINE ;
2627import static org .antlr .codebuff .CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD ;
2728import static org .antlr .codebuff .CollectFeatures .earliestAncestorStartingWithToken ;
29+ import static org .antlr .codebuff .CollectFeatures .getMatchingSymbolOnDiffLine ;
2830import static org .antlr .codebuff .CollectFeatures .getNodeFeatures ;
2931import static org .antlr .codebuff .CollectFeatures .getRealTokens ;
3032import static org .antlr .codebuff .CollectFeatures .getTokensOnPreviousLine ;
@@ -46,7 +48,6 @@ public class Formatter {
4648 protected Vector <TokenPositionAnalysis > analysis = new Vector <>();
4749
4850 protected CodekNNClassifier nlwsClassifier ;
49- protected CodekNNClassifier wsClassifier ;
5051 protected CodekNNClassifier alignClassifier ;
5152 protected int k ;
5253
@@ -112,13 +113,14 @@ public String format() {
112113 public void processToken (int indexIntoRealTokens , int tokenIndexInStream ) {
113114 CommonToken curToken = (CommonToken )tokens .get (tokenIndexInStream );
114115 String tokText = curToken .getText ();
116+ TerminalNode node = tokenToNodeMap .get (curToken );
115117
116118 emitCommentsToTheLeft (tokenIndexInStream );
117119
118120 int [] features = getNodeFeatures (tokenToNodeMap , doc , tokenIndexInStream , line , tabSize );
119121 // must set "prev end column" value as token stream doesn't have it;
120122 // we're tracking it as we emit tokens
121- features [INDEX_PREV_END_COLUMN ] = charPosInLine ;
123+ // features[INDEX_PREV_END_COLUMN] = charPosInLine;
122124
123125 int injectNL_WS = nlwsClassifier .classify (k , features , corpus .injectWhitespace , MAX_CONTEXT_DIFF_THRESHOLD );
124126 int newlines = 0 ;
@@ -130,23 +132,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
130132 ws = CollectFeatures .unwscat (injectNL_WS );
131133 }
132134
133- // getNodeFeatures() also doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
134- features [INDEX_FIRST_ON_LINE ] = newlines ; // use \n prediction to match exemplars for alignment
135-
136- int align = alignClassifier .classify (k , features , corpus .align , MAX_CONTEXT_DIFF_THRESHOLD );
137-
138- TokenPositionAnalysis tokenPositionAnalysis =
139- getTokenAnalysis (features , indexIntoRealTokens , tokenIndexInStream , newlines , align , ws );
140- analysis .setSize (tokenIndexInStream +1 );
141- analysis .set (tokenIndexInStream , tokenPositionAnalysis );
142-
143135 if ( ws ==0 && cannotJoin (realTokens .get (indexIntoRealTokens -1 ), curToken ) ) { // failsafe!
144136 ws = 1 ;
145137 }
146138
139+ int align = CAT_NO_ALIGNMENT ;
140+
147141 if ( newlines >0 ) {
148142 output .append (Tool .newlines (newlines ));
149- line ++ ;
143+ line += newlines ;
150144 charPosInLine = 0 ;
151145
152146 List <Token > tokensOnPreviousLine = getTokensOnPreviousLine (tokens , tokenIndexInStream , line );
@@ -155,9 +149,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
155149 firstTokenOnPrevLine = tokensOnPreviousLine .get (0 );
156150 }
157151
158- TerminalNode node = tokenToNodeMap .get (curToken );
159152 ParserRuleContext parent = (ParserRuleContext )node .getParent ();
160153
154+ // getNodeFeatures() doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
155+ features [INDEX_FIRST_ON_LINE ] = newlines >0 ? 1 : 0 ; // use \n prediction to match exemplars for alignment
156+ // if we decide to inject a newline, we better recompute this value before classifying alignment
157+ features [INDEX_MATCHING_TOKEN_DIFF_LINE ] = getMatchingSymbolOnDiffLine (doc , node , line );
158+
159+ align = alignClassifier .classify (k , features , corpus .align , MAX_CONTEXT_DIFF_THRESHOLD );
160+
161161 if ( align ==CAT_INDENT ) {
162162 if ( firstTokenOnPrevLine !=null ) { // if not on first line, we cannot indent
163163 int indentedCol = firstTokenOnPrevLine .getCharPositionInLine ()+INDENT_LEVEL ;
@@ -169,10 +169,7 @@ else if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
169169 int [] deltaChild = CollectFeatures .unaligncat (align );
170170 int deltaFromAncestor = deltaChild [0 ];
171171 int childIndex = deltaChild [1 ];
172- ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (parent , curToken );
173- if ( earliestLeftAncestor ==null ) {
174- earliestLeftAncestor = parent ;
175- }
172+ ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (node , curToken );
176173 ParserRuleContext ancestor = CollectFeatures .getAncestor (earliestLeftAncestor , deltaFromAncestor );
177174 ParseTree child = ancestor .getChild (childIndex );
178175 Token start = null ;
@@ -194,10 +191,7 @@ else if ( child instanceof TerminalNode ){
194191 }
195192 else if ( (align &0xFF )==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
196193 int deltaFromAncestor = CollectFeatures .unindentcat (align );
197- ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (parent , curToken );
198- if ( earliestLeftAncestor ==null ) {
199- earliestLeftAncestor = parent ;
200- }
194+ ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (node , curToken );
201195 ParserRuleContext ancestor = CollectFeatures .getAncestor (earliestLeftAncestor , deltaFromAncestor );
202196 Token start = ancestor .getStart ();
203197 int indentCol = start .getCharPositionInLine () + INDENT_LEVEL ;
@@ -211,6 +205,11 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
211205 charPosInLine += ws ;
212206 }
213207
208+ TokenPositionAnalysis tokenPositionAnalysis =
209+ getTokenAnalysis (features , indexIntoRealTokens , tokenIndexInStream , newlines , align , ws );
210+ analysis .setSize (tokenIndexInStream +1 );
211+ analysis .set (tokenIndexInStream , tokenPositionAnalysis );
212+
214213 // update Token object with position information now that we are about
215214 // to emit it.
216215 curToken .setLine (line );
@@ -227,6 +226,10 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
227226 /** Look into the token stream to get the comments to the left of current
228227 * token. Emit all whitespace and comments except for whitespace at the
229228 * end as we'll inject that per newline prediction.
229+ *
230+ * This assumes we are grooming not totally reformatting.
231+ * We able to see original input stream for comment purposes. With all
232+ * whitespace removed, we can't emit this stuff properly at moment.
230233 */
231234 public void emitCommentsToTheLeft (int tokenIndexInStream ) {
232235 List <Token > hiddenTokensToLeft = tokens .getHiddenTokensToLeft (tokenIndexInStream );
@@ -270,7 +273,7 @@ public void emitCommentsToTheLeft(int tokenIndexInStream) {
270273
271274 public TokenPositionAnalysis getTokenAnalysis (int [] features , int indexIntoRealTokens , int tokenIndexInStream ,
272275 int injectNewline ,
273- int alignWithPrevious ,
276+ int align ,
274277 int ws )
275278 {
276279 CommonToken curToken = (CommonToken )tokens .get (tokenIndexInStream );
@@ -286,12 +289,11 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT
286289
287290 boolean prevIsWS = prevToken .getChannel ()==Token .HIDDEN_CHANNEL ; // assume this means whitespace
288291 int actualNL = Tool .count (prevToken .getText (), '\n' );
289- int actualWS = Tool .count (prevToken .getText (), ' ' );
290- String newlinePredictionString = String .format ("### line %d: predicted %d \\ n actual %s" ,
292+ String newlinePredictionString = String .format ("### line %d: predicted %d \\ n actual ?" ,
291293 originalCurToken .getLine (), injectNewline , prevIsWS ? actualNL : "none" );
292- String alignPredictionString = String .format ("### line %d: predicted %s actual %s" ,
294+ String alignPredictionString = String .format ("### line %d: predicted %d actual %s" ,
293295 originalCurToken .getLine (),
294- alignWithPrevious == 1 ? " align" : "unaligned" ,
296+ align ,
295297 "?" );
296298
297299 String newlineAnalysis = newlinePredictionString +"\n " +
0 commit comments