1515package com .google .googlejavaformat .java ;
1616
1717import static com .google .common .base .Preconditions .checkArgument ;
18- import static com .google .common .base .Preconditions .checkElementIndex ;
19- import static java .util .Arrays .stream ;
2018
2119import com .google .common .collect .ImmutableList ;
2220import com .google .common .collect .Lists ;
2927import com .sun .tools .javac .parser .Tokens .TokenKind ;
3028import com .sun .tools .javac .parser .UnicodeReader ;
3129import com .sun .tools .javac .util .Context ;
32- import java .lang .reflect .Method ;
33- import java .util .ArrayList ;
34- import java .util .Collections ;
35- import java .util .Comparator ;
36- import java .util .HashSet ;
37- import java .util .List ;
38- import java .util .Objects ;
3930import java .util .Set ;
40- import org .jspecify .annotations .Nullable ;
4131
4232/** A wrapper around javac's lexer. */
4333final class JavacTokens {
@@ -54,8 +44,6 @@ static class RawTok {
5444 private final int endPos ;
5545
5646 RawTok (String stringVal , TokenKind kind , int pos , int endPos ) {
57- checkElementIndex (pos , endPos , "pos" );
58- checkArgument (pos < endPos , "expected pos (%s) < endPos (%s)" , pos , endPos );
5947 this .stringVal = stringVal ;
6048 this .kind = kind ;
6149 this .pos = pos ;
@@ -83,88 +71,30 @@ public String stringVal() {
8371 }
8472 }
8573
86- private static final TokenKind STRINGFRAGMENT =
87- stream (TokenKind .values ())
88- .filter (t -> t .name ().contentEquals ("STRINGFRAGMENT" ))
89- .findFirst ()
90- .orElse (null );
91-
92- static boolean isStringFragment (TokenKind kind ) {
93- return STRINGFRAGMENT != null && Objects .equals (kind , STRINGFRAGMENT );
94- }
95-
96- private static ImmutableList <Token > readAllTokens (
97- String source , Context context , Set <Integer > nonTerminalStringFragments ) {
74+ /** Lex the input and return a list of {@link RawTok}s. */
75+ public static ImmutableList <RawTok > getTokens (
76+ String source , Context context , Set <TokenKind > stopTokens ) {
9877 if (source == null ) {
9978 return ImmutableList .of ();
10079 }
10180 ScannerFactory fac = ScannerFactory .instance (context );
10281 char [] buffer = (source + EOF_COMMENT ).toCharArray ();
10382 Scanner scanner =
10483 new AccessibleScanner (fac , new CommentSavingTokenizer (fac , buffer , buffer .length ));
105- List <Token > tokens = new ArrayList <>();
106- do {
107- scanner .nextToken ();
108- tokens .add (scanner .token ());
109- } while (scanner .token ().kind != TokenKind .EOF );
110- for (int i = 0 ; i < tokens .size (); i ++) {
111- if (isStringFragment (tokens .get (i ).kind )) {
112- int start = i ;
113- while (isStringFragment (tokens .get (i ).kind )) {
114- i ++;
115- }
116- for (int j = start ; j < i - 1 ; j ++) {
117- nonTerminalStringFragments .add (tokens .get (j ).pos );
118- }
119- }
120- }
121- // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
122- // literal values, followed by the tokens for the template arguments. For the formatter, we
123- // want the stream of tokens to appear in order by their start position.
124- if (Runtime .version ().feature () >= 21 ) {
125- Collections .sort (tokens , Comparator .comparingInt (t -> t .pos ));
126- }
127- return ImmutableList .copyOf (tokens );
128- }
129-
130- /** Lex the input and return a list of {@link RawTok}s. */
131- public static ImmutableList <RawTok > getTokens (
132- String source , Context context , Set <TokenKind > stopTokens ) {
133- if (source == null ) {
134- return ImmutableList .of ();
135- }
136- Set <Integer > nonTerminalStringFragments = new HashSet <>();
137- ImmutableList <Token > javacTokens = readAllTokens (source , context , nonTerminalStringFragments );
138-
13984 ImmutableList .Builder <RawTok > tokens = ImmutableList .builder ();
14085 int end = source .length ();
14186 int last = 0 ;
142- for (Token t : javacTokens ) {
87+ do {
88+ scanner .nextToken ();
89+ Token t = scanner .token ();
14390 if (t .comments != null ) {
144- // javac accumulates comments in reverse order
14591 for (Comment c : Lists .reverse (t .comments )) {
146- int pos = c .getSourcePos (0 );
147- int length ;
148- if (pos == -1 ) {
149- // We've found a comment whose position hasn't been recorded. Deduce its position as the
150- // first `/` character after the end of the previous token.
151- //
152- // javac creates a new JavaTokenizer to process string template arguments, so
153- // CommentSavingTokenizer doesn't get a chance to preprocess those comments and save
154- // their text and positions.
155- //
156- // TODO: consider always using this approach once the minimum supported JDK is 16 and
157- // we can assume BasicComment#getRawCharacters is always available.
158- pos = source .indexOf ('/' , last );
159- length = CommentSavingTokenizer .commentLength (c );
160- } else {
161- length = c .getText ().length ();
92+ if (last < c .getSourcePos (0 )) {
93+ tokens .add (new RawTok (null , null , last , c .getSourcePos (0 )));
16294 }
163- if (last < pos ) {
164- tokens .add (new RawTok (null , null , last , pos ));
165- }
166- tokens .add (new RawTok (null , null , pos , pos + length ));
167- last = pos + length ;
95+ tokens .add (
96+ new RawTok (null , null , c .getSourcePos (0 ), c .getSourcePos (0 ) + c .getText ().length ()));
97+ last = c .getSourcePos (0 ) + c .getText ().length ();
16898 }
16999 }
170100 if (stopTokens .contains (t .kind )) {
@@ -176,25 +106,14 @@ public static ImmutableList<RawTok> getTokens(
176106 if (last < t .pos ) {
177107 tokens .add (new RawTok (null , null , last , t .pos ));
178108 }
179- if (isStringFragment (t .kind )) {
180- int endPos = t .endPos ;
181- int pos = t .pos ;
182- if (nonTerminalStringFragments .contains (t .pos )) {
183- // Include the \ escape from \{...} in the preceding string fragment
184- endPos ++;
185- }
186- tokens .add (new RawTok (source .substring (pos , endPos ), t .kind , pos , endPos ));
187- last = endPos ;
188- } else {
189- tokens .add (
190- new RawTok (
191- t .kind == TokenKind .STRINGLITERAL ? "\" " + t .stringVal () + "\" " : null ,
192- t .kind ,
193- t .pos ,
194- t .endPos ));
195- last = t .endPos ;
196- }
197- }
109+ tokens .add (
110+ new RawTok (
111+ t .kind == TokenKind .STRINGLITERAL ? "\" " + t .stringVal () + "\" " : null ,
112+ t .kind ,
113+ t .pos ,
114+ t .endPos ));
115+ last = t .endPos ;
116+ } while (scanner .token ().kind != TokenKind .EOF );
198117 if (last < end ) {
199118 tokens .add (new RawTok (null , null , last , end ));
200119 }
@@ -203,32 +122,6 @@ public static ImmutableList<RawTok> getTokens(
203122
204123 /** A {@link JavaTokenizer} that saves comments. */
205124 static class CommentSavingTokenizer extends JavaTokenizer {
206-
207- private static final Method GET_RAW_CHARACTERS_METHOD = getRawCharactersMethod ();
208-
209- private static @ Nullable Method getRawCharactersMethod () {
210- try {
211- // This is a method in PositionTrackingReader, but that class is not public.
212- return BasicComment .class .getMethod ("getRawCharacters" );
213- } catch (NoSuchMethodException e ) {
214- return null ;
215- }
216- }
217-
218- static int commentLength (Comment comment ) {
219- if (comment instanceof BasicComment && GET_RAW_CHARACTERS_METHOD != null ) {
220- // If we've seen a BasicComment instead of a CommentWithTextAndPosition, getText() will
221- // be null, so we deduce the length using getRawCharacters. See also the comment at the
222- // usage of this method in getTokens.
223- try {
224- return ((char []) GET_RAW_CHARACTERS_METHOD .invoke (((BasicComment ) comment ))).length ;
225- } catch (ReflectiveOperationException e ) {
226- throw new LinkageError (e .getMessage (), e );
227- }
228- }
229- return comment .getText ().length ();
230- }
231-
232125 CommentSavingTokenizer (ScannerFactory fac , char [] buffer , int length ) {
233126 super (fac , buffer , length );
234127 }
0 commit comments