1212
1313#include " TGLexer.h"
1414#include " llvm/ADT/ArrayRef.h"
15+ #include " llvm/ADT/StringExtras.h"
1516#include " llvm/ADT/StringSwitch.h"
1617#include " llvm/ADT/Twine.h"
1718#include " llvm/Config/config.h" // for strtoull()/strtoll() define
2021#include " llvm/Support/SourceMgr.h"
2122#include " llvm/TableGen/Error.h"
2223#include < algorithm>
23- #include < cctype>
2424#include < cerrno>
2525#include < cstdint>
2626#include < cstdio>
@@ -38,6 +38,17 @@ struct PreprocessorDir {
3838};
3939} // end anonymous namespace
4040
41+ // / Returns true if `C` is a valid character in an identifier. If `First` is
42+ // / true, returns true if `C` is a valid first character of an identifier,
43+ // / else returns true if `C` is a valid non-first character of an identifier.
44+ // / Identifiers match the following regular expression:
45+ // / [a-zA-Z_][0-9a-zA-Z_]*
46+ static bool isValidIDChar (char C, bool First) {
47+ if (C == ' _' || isAlpha (C))
48+ return true ;
49+ return !First && isDigit (C);
50+ }
51+
4152constexpr PreprocessorDir PreprocessorDirs[] = {{tgtok::Ifdef, " ifdef" },
4253 {tgtok::Ifndef, " ifndef" },
4354 {tgtok::Else, " else" },
@@ -51,14 +62,14 @@ static const char *lexMacroName(StringRef Str) {
5162
5263 // Macro names start with [a-zA-Z_].
5364 const char *Next = Str.begin ();
54- if (*Next != ' _ ' && ! isalpha (*Next ))
65+ if (! isValidIDChar ( *Next, /* First= */ true ))
5566 return Next;
5667 // Eat the first character of the name.
5768 ++Next;
5869
5970 // Match the rest of the identifier regex: [0-9a-zA-Z_]*
6071 const char *End = Str.end ();
61- while (Next != End && ( isalpha ( *Next) || isdigit (*Next) || *Next == ' _ ' ))
72+ while (Next != End && isValidIDChar ( *Next, /* First= */ false ))
6273 ++Next;
6374 return Next;
6475}
@@ -173,7 +184,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
173184 switch (CurChar) {
174185 default :
175186 // Handle letters: [a-zA-Z_]
176- if (isalpha (CurChar) || CurChar == ' _ ' )
187+ if (isValidIDChar (CurChar, /* First= */ true ) )
177188 return LexIdentifier ();
178189
179190 // Unknown character, emit an error.
@@ -250,14 +261,14 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
250261 case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' : case ' 5' : case ' 6' :
251262 case ' 7' : case ' 8' : case ' 9' : {
252263 int NextChar = 0 ;
253- if (isdigit (CurChar)) {
264+ if (isDigit (CurChar)) {
254265 // Allow identifiers to start with a number if it is followed by
255266 // an identifier. This can happen with paste operations like
256267 // foo#8i.
257268 int i = 0 ;
258269 do {
259270 NextChar = peekNextChar (i++);
260- } while (isdigit (NextChar));
271+ } while (isDigit (NextChar));
261272
262273 if (NextChar == ' x' || NextChar == ' b' ) {
263274 // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
@@ -281,7 +292,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
281292 }
282293 }
283294
284- if (isalpha (NextChar) || NextChar == ' _ ' )
295+ if (isValidIDChar (NextChar, /* First= */ true ) )
285296 return LexIdentifier ();
286297
287298 return LexNumber ();
@@ -347,13 +358,13 @@ tgtok::TokKind TGLexer::LexString() {
347358}
348359
349360tgtok::TokKind TGLexer::LexVarName () {
350- if (!isalpha (CurPtr[0 ]) && CurPtr[ 0 ] != ' _ ' )
361+ if (!isValidIDChar (CurPtr[0 ], /* First= */ true ) )
351362 return ReturnError (TokStart, " Invalid variable name" );
352363
353364 // Otherwise, we're ok, consume the rest of the characters.
354365 const char *VarNameStart = CurPtr++;
355366
356- while (isalpha (*CurPtr) || isdigit (*CurPtr) || *CurPtr == ' _ ' )
367+ while (isValidIDChar (*CurPtr, /* First= */ false ) )
357368 ++CurPtr;
358369
359370 CurStrVal.assign (VarNameStart, CurPtr);
@@ -365,7 +376,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
365376 const char *IdentStart = TokStart;
366377
367378 // Match the rest of the identifier regex: [0-9a-zA-Z_]*
368- while (isalpha (*CurPtr) || isdigit (*CurPtr) || *CurPtr == ' _ ' )
379+ while (isValidIDChar (*CurPtr, /* First= */ false ) )
369380 ++CurPtr;
370381
371382 // Check to see if this identifier is a reserved keyword.
@@ -500,7 +511,7 @@ tgtok::TokKind TGLexer::LexNumber() {
500511 Base = 16 ;
501512 do
502513 ++CurPtr;
503- while (isxdigit (CurPtr[0 ]));
514+ while (isHexDigit (CurPtr[0 ]));
504515 } else if (CurPtr[0 ] == ' b' ) {
505516 Base = 2 ;
506517 do
@@ -515,7 +526,7 @@ tgtok::TokKind TGLexer::LexNumber() {
515526 // Check if it's a decimal value.
516527 if (Base == 0 ) {
517528 // Check for a sign without a digit.
518- if (!isdigit (CurPtr[0 ])) {
529+ if (!isDigit (CurPtr[0 ])) {
519530 if (CurPtr[-1 ] == ' -' )
520531 return tgtok::minus;
521532 else if (CurPtr[-1 ] == ' +' )
@@ -526,7 +537,7 @@ tgtok::TokKind TGLexer::LexNumber() {
526537 NumStart = TokStart;
527538 IsMinus = CurPtr[-1 ] == ' -' ;
528539
529- while (isdigit (CurPtr[0 ]))
540+ while (isDigit (CurPtr[0 ]))
530541 ++CurPtr;
531542 }
532543
@@ -574,11 +585,11 @@ tgtok::TokKind TGLexer::LexBracket() {
574585
575586// / LexExclaim - Lex '!' and '![a-zA-Z]+'.
576587tgtok::TokKind TGLexer::LexExclaim () {
577- if (!isalpha (*CurPtr))
588+ if (!isAlpha (*CurPtr))
578589 return ReturnError (CurPtr - 1 , " Invalid \" !operator\" " );
579590
580591 const char *Start = CurPtr++;
581- while (isalpha (*CurPtr))
592+ while (isAlpha (*CurPtr))
582593 ++CurPtr;
583594
584595 // Check to see which operator this is.
0 commit comments