@@ -174,129 +174,174 @@ int TGLexer::peekNextChar(int Index) const {
174
174
}
175
175
176
176
tgtok::TokKind TGLexer::LexToken (bool FileOrLineStart) {
177
- TokStart = CurPtr;
178
- // This always consumes at least one character.
179
- int CurChar = getNextChar ();
177
+ while (true ) {
178
+ TokStart = CurPtr;
179
+ // This always consumes at least one character.
180
+ int CurChar = getNextChar ();
180
181
181
- switch (CurChar) {
182
- default :
183
- // Handle letters: [a-zA-Z_]
184
- if (isValidIDChar (CurChar, /* First=*/ true ))
185
- return LexIdentifier ();
186
-
187
- // Unknown character, emit an error.
188
- return ReturnError (TokStart, " unexpected character" );
189
- case EOF:
190
- // Lex next token, if we just left an include file.
191
- // Note that leaving an include file means that the next
192
- // symbol is located at the end of the 'include "..."'
193
- // construct, so LexToken() is called with default
194
- // false parameter.
195
- if (processEOF ())
196
- return LexToken ();
182
+ switch (CurChar) {
183
+ default :
184
+ // Handle letters: [a-zA-Z_]
185
+ if (isValidIDChar (CurChar, /* First=*/ true ))
186
+ return LexIdentifier ();
197
187
198
- // Return EOF denoting the end of lexing.
199
- return tgtok::Eof;
200
-
201
- case ' :' : return tgtok::colon;
202
- case ' ;' : return tgtok::semi;
203
- case ' ,' : return tgtok::comma;
204
- case ' <' : return tgtok::less;
205
- case ' >' : return tgtok::greater;
206
- case ' ]' : return tgtok::r_square;
207
- case ' {' : return tgtok::l_brace;
208
- case ' }' : return tgtok::r_brace;
209
- case ' (' : return tgtok::l_paren;
210
- case ' )' : return tgtok::r_paren;
211
- case ' =' : return tgtok::equal;
212
- case ' ?' : return tgtok::question;
213
- case ' #' :
214
- if (FileOrLineStart) {
215
- tgtok::TokKind Kind = prepIsDirective ();
216
- if (Kind != tgtok::Error)
217
- return lexPreprocessor (Kind);
218
- }
188
+ // Unknown character, emit an error.
189
+ return ReturnError (TokStart, " unexpected character" );
190
+ case EOF:
191
+ // Lex next token, if we just left an include file.
192
+ if (processEOF ()) {
193
+ // Leaving an include file means that the next symbol is located at the
194
+ // end of the 'include "..."' construct.
195
+ FileOrLineStart = false ;
196
+ break ;
197
+ }
219
198
220
- return tgtok::paste;
199
+ // Return EOF denoting the end of lexing.
200
+ return tgtok::Eof;
201
+
202
+ case ' :' :
203
+ return tgtok::colon;
204
+ case ' ;' :
205
+ return tgtok::semi;
206
+ case ' ,' :
207
+ return tgtok::comma;
208
+ case ' <' :
209
+ return tgtok::less;
210
+ case ' >' :
211
+ return tgtok::greater;
212
+ case ' ]' :
213
+ return tgtok::r_square;
214
+ case ' {' :
215
+ return tgtok::l_brace;
216
+ case ' }' :
217
+ return tgtok::r_brace;
218
+ case ' (' :
219
+ return tgtok::l_paren;
220
+ case ' )' :
221
+ return tgtok::r_paren;
222
+ case ' =' :
223
+ return tgtok::equal;
224
+ case ' ?' :
225
+ return tgtok::question;
226
+ case ' #' :
227
+ if (FileOrLineStart) {
228
+ tgtok::TokKind Kind = prepIsDirective ();
229
+ if (Kind != tgtok::Error)
230
+ return lexPreprocessor (Kind);
231
+ }
232
+
233
+ return tgtok::paste;
221
234
222
- // The period is a separate case so we can recognize the "..."
223
- // range punctuator.
224
- case ' .' :
225
- if (peekNextChar (0 ) == ' .' ) {
226
- ++CurPtr; // Eat second dot.
235
+ // The period is a separate case so we can recognize the "..."
236
+ // range punctuator.
237
+ case ' .' :
227
238
if (peekNextChar (0 ) == ' .' ) {
228
- ++CurPtr; // Eat third dot.
229
- return tgtok::dotdotdot;
239
+ ++CurPtr; // Eat second dot.
240
+ if (peekNextChar (0 ) == ' .' ) {
241
+ ++CurPtr; // Eat third dot.
242
+ return tgtok::dotdotdot;
243
+ }
244
+ return ReturnError (TokStart, " invalid '..' punctuation" );
230
245
}
231
- return ReturnError (TokStart, " invalid '..' punctuation" );
232
- }
233
- return tgtok::dot;
246
+ return tgtok::dot;
234
247
235
- case ' \r ' :
236
- llvm_unreachable (" getNextChar() must never return '\r '" );
248
+ case ' \r ' :
249
+ llvm_unreachable (" getNextChar() must never return '\r '" );
237
250
238
- case ' ' :
239
- case ' \t ' :
240
- // Ignore whitespace.
241
- return LexToken (FileOrLineStart);
242
- case ' \n ' :
243
- // Ignore whitespace, and identify the new line.
244
- return LexToken (true );
245
- case ' /' :
246
- // If this is the start of a // comment, skip until the end of the line or
247
- // the end of the buffer.
248
- if (*CurPtr == ' /' )
249
- SkipBCPLComment ();
250
- else if (*CurPtr == ' *' ) {
251
- if (SkipCComment ())
252
- return tgtok::Error;
253
- } else // Otherwise, this is an error.
254
- return ReturnError (TokStart, " unexpected character" );
255
- return LexToken (FileOrLineStart);
256
- case ' -' : case ' +' :
257
- case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' : case ' 5' : case ' 6' :
258
- case ' 7' : case ' 8' : case ' 9' : {
259
- int NextChar = 0 ;
260
- if (isDigit (CurChar)) {
261
- // Allow identifiers to start with a number if it is followed by
262
- // an identifier. This can happen with paste operations like
263
- // foo#8i.
264
- int i = 0 ;
265
- do {
266
- NextChar = peekNextChar (i++);
267
- } while (isDigit (NextChar));
268
-
269
- if (NextChar == ' x' || NextChar == ' b' ) {
270
- // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
271
- // likely a number.
272
- int NextNextChar = peekNextChar (i);
273
- switch (NextNextChar) {
274
- default :
275
- break ;
276
- case ' 0' : case ' 1' :
277
- if (NextChar == ' b' )
278
- return LexNumber ();
279
- [[fallthrough]];
280
- case ' 2' : case ' 3' : case ' 4' : case ' 5' :
281
- case ' 6' : case ' 7' : case ' 8' : case ' 9' :
282
- case ' a' : case ' b' : case ' c' : case ' d' : case ' e' : case ' f' :
283
- case ' A' : case ' B' : case ' C' : case ' D' : case ' E' : case ' F' :
284
- if (NextChar == ' x' )
285
- return LexNumber ();
286
- break ;
251
+ case ' ' :
252
+ case ' \t ' :
253
+ // Ignore whitespace.
254
+ break ;
255
+ case ' \n ' :
256
+ // Ignore whitespace, and identify the new line.
257
+ FileOrLineStart = true ;
258
+ break ;
259
+ case ' /' :
260
+ // If this is the start of a // comment, skip until the end of the line or
261
+ // the end of the buffer.
262
+ if (*CurPtr == ' /' )
263
+ SkipBCPLComment ();
264
+ else if (*CurPtr == ' *' ) {
265
+ if (SkipCComment ())
266
+ return tgtok::Error;
267
+ } else // Otherwise, this is an error.
268
+ return ReturnError (TokStart, " unexpected character" );
269
+ break ;
270
+ case ' -' :
271
+ case ' +' :
272
+ case ' 0' :
273
+ case ' 1' :
274
+ case ' 2' :
275
+ case ' 3' :
276
+ case ' 4' :
277
+ case ' 5' :
278
+ case ' 6' :
279
+ case ' 7' :
280
+ case ' 8' :
281
+ case ' 9' : {
282
+ int NextChar = 0 ;
283
+ if (isDigit (CurChar)) {
284
+ // Allow identifiers to start with a number if it is followed by
285
+ // an identifier. This can happen with paste operations like
286
+ // foo#8i.
287
+ int i = 0 ;
288
+ do {
289
+ NextChar = peekNextChar (i++);
290
+ } while (isDigit (NextChar));
291
+
292
+ if (NextChar == ' x' || NextChar == ' b' ) {
293
+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
294
+ // likely a number.
295
+ int NextNextChar = peekNextChar (i);
296
+ switch (NextNextChar) {
297
+ default :
298
+ break ;
299
+ case ' 0' :
300
+ case ' 1' :
301
+ if (NextChar == ' b' )
302
+ return LexNumber ();
303
+ [[fallthrough]];
304
+ case ' 2' :
305
+ case ' 3' :
306
+ case ' 4' :
307
+ case ' 5' :
308
+ case ' 6' :
309
+ case ' 7' :
310
+ case ' 8' :
311
+ case ' 9' :
312
+ case ' a' :
313
+ case ' b' :
314
+ case ' c' :
315
+ case ' d' :
316
+ case ' e' :
317
+ case ' f' :
318
+ case ' A' :
319
+ case ' B' :
320
+ case ' C' :
321
+ case ' D' :
322
+ case ' E' :
323
+ case ' F' :
324
+ if (NextChar == ' x' )
325
+ return LexNumber ();
326
+ break ;
327
+ }
287
328
}
288
329
}
289
- }
290
330
291
- if (isValidIDChar (NextChar, /* First=*/ true ))
292
- return LexIdentifier ();
331
+ if (isValidIDChar (NextChar, /* First=*/ true ))
332
+ return LexIdentifier ();
293
333
294
- return LexNumber ();
295
- }
296
- case ' "' : return LexString ();
297
- case ' $' : return LexVarName ();
298
- case ' [' : return LexBracket ();
299
- case ' !' : return LexExclaim ();
334
+ return LexNumber ();
335
+ }
336
+ case ' "' :
337
+ return LexString ();
338
+ case ' $' :
339
+ return LexVarName ();
340
+ case ' [' :
341
+ return LexBracket ();
342
+ case ' !' :
343
+ return LexExclaim ();
344
+ }
300
345
}
301
346
}
302
347
0 commit comments