Skip to content

Commit d988991

Browse files
authored
[flang] Tokenize all -D macro bodies, and do it better (#168116)
The compiler presently tokenizes the bodies of only function-like macro definitions from the command line, and does so crudely. Tokenize keyword-like macros too, get character literals right, and handle numeric constants correctly. (Also delete two needless functions noticed in characters.h.) Fixes #168077.
1 parent c2445d9 commit d988991

File tree

4 files changed

+51
-23
lines changed

4 files changed

+51
-23
lines changed

flang/include/flang/Parser/characters.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,6 @@ inline constexpr char ToLowerCaseLetter(char ch) {
6969
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
7070
}
7171

72-
inline constexpr char ToLowerCaseLetter(char &&ch) {
73-
return IsUpperCaseLetter(ch) ? ch - 'A' + 'a' : ch;
74-
}
75-
7672
inline std::string ToLowerCaseLetters(std::string_view str) {
7773
std::string lowered{str};
7874
for (char &ch : lowered) {
@@ -85,10 +81,6 @@ inline constexpr char ToUpperCaseLetter(char ch) {
8581
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
8682
}
8783

88-
inline constexpr char ToUpperCaseLetter(char &&ch) {
89-
return IsLowerCaseLetter(ch) ? ch - 'a' + 'A' : ch;
90-
}
91-
9284
inline std::string ToUpperCaseLetters(std::string_view str) {
9385
std::string raised{str};
9486
for (char &ch : raised) {

flang/include/flang/Parser/preprocessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class Definition {
3838
Definition(const std::vector<std::string> &argNames, const TokenSequence &,
3939
std::size_t firstToken, std::size_t tokens, bool isVariadic = false);
4040
Definition(const std::string &predefined, AllSources &);
41+
Definition(const TokenSequence &predefined);
4142

4243
bool isFunctionLike() const { return isFunctionLike_; }
4344
std::size_t argumentCount() const { return argNames_.size(); }

flang/lib/Parser/preprocessor.cpp

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Definition::Definition(const std::string &predefined, AllSources &sources)
4343
replacement_{
4444
predefined, sources.AddCompilerInsertion(predefined).start()} {}
4545

46+
Definition::Definition(const TokenSequence &repl)
47+
: isPredefined_{true}, replacement_{repl} {}
48+
4649
bool Definition::set_isDisabled(bool disable) {
4750
bool was{isDisabled_};
4851
isDisabled_ = disable;
@@ -371,40 +374,66 @@ TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) {
371374
Provenance provenance{allSources_.AddCompilerInsertion(str).start()};
372375
auto end{str.size()};
373376
for (std::string::size_type at{0}; at < end;) {
374-
// Alternate between tokens that are identifiers (and therefore subject
375-
// to argument replacement) and those that are not.
376-
auto start{str.find_first_of(idChars, at)};
377-
if (start == str.npos) {
378-
tokens.Put(str.substr(at), provenance + at);
379-
break;
380-
} else if (start > at) {
381-
tokens.Put(str.substr(at, start - at), provenance + at);
377+
char ch{str.at(at)};
378+
if (IsWhiteSpace(ch)) {
379+
++at;
380+
continue;
382381
}
383-
at = str.find_first_not_of(idChars, start + 1);
384-
if (at == str.npos) {
382+
std::string::size_type start{at};
383+
if (IsLegalIdentifierStart(ch)) {
384+
for (++at; at < end && IsLegalInIdentifier(str.at(at)); ++at) {
385+
}
386+
} else if (IsDecimalDigit(ch) || ch == '.') {
387+
for (++at; at < end; ++at) {
388+
ch = str.at(at);
389+
if (!IsDecimalDigit(ch) && ch != '.') {
390+
break;
391+
}
392+
}
393+
if (at < end) {
394+
ch = ToUpperCaseLetter(str.at(at));
395+
if (ch == 'E' || ch == 'D' || ch == 'Q') {
396+
if (++at < end) {
397+
ch = str.at(at);
398+
if (ch == '+' || ch == '-') {
399+
++at;
400+
}
401+
for (; at < end && IsDecimalDigit(str.at(at)); ++at) {
402+
}
403+
}
404+
}
405+
}
406+
} else if (ch == '\'' || ch == '"') {
407+
for (++at; at < end && str.at(at) != ch; ++at) {
408+
}
409+
if (at < end) {
410+
++at;
411+
}
412+
} else {
413+
++at; // single-character token
414+
}
415+
if (at >= end || at == str.npos) {
385416
tokens.Put(str.substr(start), provenance + start);
386417
break;
387-
} else {
388-
tokens.Put(str.substr(start, at - start), provenance + start);
389418
}
419+
tokens.Put(str.substr(start, at - start), provenance + start);
390420
}
391421
return tokens;
392422
}
393423

394424
void Preprocessor::Define(const std::string &macro, const std::string &value) {
425+
TokenSequence rhs{TokenizeMacroBody(value)};
395426
if (auto lhs{TokenizeMacroNameAndArgs(macro)}) {
396427
// function-like macro
397428
CharBlock macroName{SaveTokenAsName(lhs->front())};
398429
auto iter{lhs->begin()};
399430
++iter;
400431
std::vector<std::string> argNames{iter, lhs->end()};
401-
auto rhs{TokenizeMacroBody(value)};
402432
definitions_.emplace(std::make_pair(macroName,
403433
Definition{
404434
argNames, rhs, 0, rhs.SizeInTokens(), /*isVariadic=*/false}));
405435
} else { // keyword macro
406-
definitions_.emplace(
407-
SaveTokenAsName(macro), Definition{value, allSources_});
436+
definitions_.emplace(SaveTokenAsName(macro), Definition{rhs});
408437
}
409438
}
410439

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
!RUN: %flang -E -DNVAR=2+1+0+0 %s 2>&1 | FileCheck %s
2+
!CHECK: pass
3+
#if NVAR > 2
4+
call pass
5+
#endif
6+
end

0 commit comments

Comments
 (0)