@@ -174,129 +174,174 @@ int TGLexer::peekNextChar(int Index) const {
174174}
175175
176176tgtok::TokKind TGLexer::LexToken (bool FileOrLineStart) {
177- TokStart = CurPtr;
178- // This always consumes at least one character.
179- int CurChar = getNextChar ();
177+ while (true ) {
178+ TokStart = CurPtr;
179+ // This always consumes at least one character.
180+ int CurChar = getNextChar ();
180181
181- switch (CurChar) {
182- default :
183- // Handle letters: [a-zA-Z_]
184- if (isValidIDChar (CurChar, /* First=*/ true ))
185- return LexIdentifier ();
186-
187- // Unknown character, emit an error.
188- return ReturnError (TokStart, " unexpected character" );
189- case EOF:
190- // Lex next token, if we just left an include file.
191- // Note that leaving an include file means that the next
192- // symbol is located at the end of the 'include "..."'
193- // construct, so LexToken() is called with default
194- // false parameter.
195- if (processEOF ())
196- return LexToken ();
182+ switch (CurChar) {
183+ default :
184+ // Handle letters: [a-zA-Z_]
185+ if (isValidIDChar (CurChar, /* First=*/ true ))
186+ return LexIdentifier ();
197187
198- // Return EOF denoting the end of lexing.
199- return tgtok::Eof;
200-
201- case ' :' : return tgtok::colon;
202- case ' ;' : return tgtok::semi;
203- case ' ,' : return tgtok::comma;
204- case ' <' : return tgtok::less;
205- case ' >' : return tgtok::greater;
206- case ' ]' : return tgtok::r_square;
207- case ' {' : return tgtok::l_brace;
208- case ' }' : return tgtok::r_brace;
209- case ' (' : return tgtok::l_paren;
210- case ' )' : return tgtok::r_paren;
211- case ' =' : return tgtok::equal;
212- case ' ?' : return tgtok::question;
213- case ' #' :
214- if (FileOrLineStart) {
215- tgtok::TokKind Kind = prepIsDirective ();
216- if (Kind != tgtok::Error)
217- return lexPreprocessor (Kind);
218- }
188+ // Unknown character, emit an error.
189+ return ReturnError (TokStart, " unexpected character" );
190+ case EOF:
191+ // Lex next token, if we just left an include file.
192+ if (processEOF ()) {
193+ // Leaving an include file means that the next symbol is located at the
194+ // end of the 'include "..."' construct.
195+ FileOrLineStart = false ;
196+ break ;
197+ }
219198
220- return tgtok::paste;
199+ // Return EOF denoting the end of lexing.
200+ return tgtok::Eof;
201+
202+ case ' :' :
203+ return tgtok::colon;
204+ case ' ;' :
205+ return tgtok::semi;
206+ case ' ,' :
207+ return tgtok::comma;
208+ case ' <' :
209+ return tgtok::less;
210+ case ' >' :
211+ return tgtok::greater;
212+ case ' ]' :
213+ return tgtok::r_square;
214+ case ' {' :
215+ return tgtok::l_brace;
216+ case ' }' :
217+ return tgtok::r_brace;
218+ case ' (' :
219+ return tgtok::l_paren;
220+ case ' )' :
221+ return tgtok::r_paren;
222+ case ' =' :
223+ return tgtok::equal;
224+ case ' ?' :
225+ return tgtok::question;
226+ case ' #' :
227+ if (FileOrLineStart) {
228+ tgtok::TokKind Kind = prepIsDirective ();
229+ if (Kind != tgtok::Error)
230+ return lexPreprocessor (Kind);
231+ }
232+
233+ return tgtok::paste;
221234
222- // The period is a separate case so we can recognize the "..."
223- // range punctuator.
224- case ' .' :
225- if (peekNextChar (0 ) == ' .' ) {
226- ++CurPtr; // Eat second dot.
235+ // The period is a separate case so we can recognize the "..."
236+ // range punctuator.
237+ case ' .' :
227238 if (peekNextChar (0 ) == ' .' ) {
228- ++CurPtr; // Eat third dot.
229- return tgtok::dotdotdot;
239+ ++CurPtr; // Eat second dot.
240+ if (peekNextChar (0 ) == ' .' ) {
241+ ++CurPtr; // Eat third dot.
242+ return tgtok::dotdotdot;
243+ }
244+ return ReturnError (TokStart, " invalid '..' punctuation" );
230245 }
231- return ReturnError (TokStart, " invalid '..' punctuation" );
232- }
233- return tgtok::dot;
246+ return tgtok::dot;
234247
235- case ' \r ' :
236- llvm_unreachable (" getNextChar() must never return '\r '" );
248+ case ' \r ' :
249+ llvm_unreachable (" getNextChar() must never return '\r '" );
237250
238- case ' ' :
239- case ' \t ' :
240- // Ignore whitespace.
241- return LexToken (FileOrLineStart);
242- case ' \n ' :
243- // Ignore whitespace, and identify the new line.
244- return LexToken (true );
245- case ' /' :
246- // If this is the start of a // comment, skip until the end of the line or
247- // the end of the buffer.
248- if (*CurPtr == ' /' )
249- SkipBCPLComment ();
250- else if (*CurPtr == ' *' ) {
251- if (SkipCComment ())
252- return tgtok::Error;
253- } else // Otherwise, this is an error.
254- return ReturnError (TokStart, " unexpected character" );
255- return LexToken (FileOrLineStart);
256- case ' -' : case ' +' :
257- case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' : case ' 5' : case ' 6' :
258- case ' 7' : case ' 8' : case ' 9' : {
259- int NextChar = 0 ;
260- if (isDigit (CurChar)) {
261- // Allow identifiers to start with a number if it is followed by
262- // an identifier. This can happen with paste operations like
263- // foo#8i.
264- int i = 0 ;
265- do {
266- NextChar = peekNextChar (i++);
267- } while (isDigit (NextChar));
268-
269- if (NextChar == ' x' || NextChar == ' b' ) {
270- // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
271- // likely a number.
272- int NextNextChar = peekNextChar (i);
273- switch (NextNextChar) {
274- default :
275- break ;
276- case ' 0' : case ' 1' :
277- if (NextChar == ' b' )
278- return LexNumber ();
279- [[fallthrough]];
280- case ' 2' : case ' 3' : case ' 4' : case ' 5' :
281- case ' 6' : case ' 7' : case ' 8' : case ' 9' :
282- case ' a' : case ' b' : case ' c' : case ' d' : case ' e' : case ' f' :
283- case ' A' : case ' B' : case ' C' : case ' D' : case ' E' : case ' F' :
284- if (NextChar == ' x' )
285- return LexNumber ();
286- break ;
251+ case ' ' :
252+ case ' \t ' :
253+ // Ignore whitespace.
254+ break ;
255+ case ' \n ' :
256+ // Ignore whitespace, and identify the new line.
257+ FileOrLineStart = true ;
258+ break ;
259+ case ' /' :
260+ // If this is the start of a // comment, skip until the end of the line or
261+ // the end of the buffer.
262+ if (*CurPtr == ' /' )
263+ SkipBCPLComment ();
264+ else if (*CurPtr == ' *' ) {
265+ if (SkipCComment ())
266+ return tgtok::Error;
267+ } else // Otherwise, this is an error.
268+ return ReturnError (TokStart, " unexpected character" );
269+ break ;
270+ case ' -' :
271+ case ' +' :
272+ case ' 0' :
273+ case ' 1' :
274+ case ' 2' :
275+ case ' 3' :
276+ case ' 4' :
277+ case ' 5' :
278+ case ' 6' :
279+ case ' 7' :
280+ case ' 8' :
281+ case ' 9' : {
282+ int NextChar = 0 ;
283+ if (isDigit (CurChar)) {
284+ // Allow identifiers to start with a number if it is followed by
285+ // an identifier. This can happen with paste operations like
286+ // foo#8i.
287+ int i = 0 ;
288+ do {
289+ NextChar = peekNextChar (i++);
290+ } while (isDigit (NextChar));
291+
292+ if (NextChar == ' x' || NextChar == ' b' ) {
293+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
294+ // likely a number.
295+ int NextNextChar = peekNextChar (i);
296+ switch (NextNextChar) {
297+ default :
298+ break ;
299+ case ' 0' :
300+ case ' 1' :
301+ if (NextChar == ' b' )
302+ return LexNumber ();
303+ [[fallthrough]];
304+ case ' 2' :
305+ case ' 3' :
306+ case ' 4' :
307+ case ' 5' :
308+ case ' 6' :
309+ case ' 7' :
310+ case ' 8' :
311+ case ' 9' :
312+ case ' a' :
313+ case ' b' :
314+ case ' c' :
315+ case ' d' :
316+ case ' e' :
317+ case ' f' :
318+ case ' A' :
319+ case ' B' :
320+ case ' C' :
321+ case ' D' :
322+ case ' E' :
323+ case ' F' :
324+ if (NextChar == ' x' )
325+ return LexNumber ();
326+ break ;
327+ }
287328 }
288329 }
289- }
290330
291- if (isValidIDChar (NextChar, /* First=*/ true ))
292- return LexIdentifier ();
331+ if (isValidIDChar (NextChar, /* First=*/ true ))
332+ return LexIdentifier ();
293333
294- return LexNumber ();
295- }
296- case ' "' : return LexString ();
297- case ' $' : return LexVarName ();
298- case ' [' : return LexBracket ();
299- case ' !' : return LexExclaim ();
334+ return LexNumber ();
335+ }
336+ case ' "' :
337+ return LexString ();
338+ case ' $' :
339+ return LexVarName ();
340+ case ' [' :
341+ return LexBracket ();
342+ case ' !' :
343+ return LexExclaim ();
344+ }
300345 }
301346}
302347
0 commit comments