Skip to content

Commit fa5af80

Browse files
committed
[clang][deps] Teach dep directive scanner about _Pragma
While we cannot handle `_Pragma` used inside macros, we can handle this at the top level, and it some projects use the `_Pragma("once")` spelling like that, which was causing spurious failures in the scanner. Limitations * Cannot handle #define ONCE _Pragma("once"), same issue as using @import in a macro -- ideally we should diagnose this in obvious cases * Our LangOpts are currently fixed, so we are not handling u"" strings or R"()" strings that require C11/C++11. rdar://108629982 Differential Revision: https://reviews.llvm.org/D149884 (cherry picked from commit ee8ed0b)
1 parent 6f5414e commit fa5af80

File tree

5 files changed

+271
-41
lines changed

5 files changed

+271
-41
lines changed

clang/include/clang/Lex/Pragma.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ class PragmaNamespace : public PragmaHandler {
123123
PragmaNamespace *getIfNamespace() override { return this; }
124124
};
125125

126+
/// Destringize a \c _Pragma("") string according to C11 6.10.9.1:
127+
/// "The string literal is destringized by deleting any encoding prefix,
128+
/// deleting the leading and trailing double-quotes, replacing each escape
129+
/// sequence \" by a double-quote, and replacing each escape sequence \\ by a
130+
/// single backslash."
131+
void prepare_PragmaString(SmallVectorImpl<char> &StrVal);
132+
126133
} // namespace clang
127134

128135
#endif // LLVM_CLANG_LEX_PRAGMA_H

clang/lib/Lex/DependencyDirectivesScanner.cpp

Lines changed: 105 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "clang/Basic/Diagnostic.h"
2020
#include "clang/Lex/LexDiagnostic.h"
2121
#include "clang/Lex/Lexer.h"
22+
#include "clang/Lex/Pragma.h"
2223
#include "llvm/ADT/ScopeExit.h"
2324
#include "llvm/ADT/SmallString.h"
2425
#include "llvm/ADT/StringMap.h"
@@ -71,6 +72,8 @@ struct Scanner {
7172
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
7273
LangOpts.ObjC = true;
7374
LangOpts.LineComment = true;
75+
// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
76+
// R"()" literals.
7477
return LangOpts;
7578
}
7679

@@ -90,6 +93,10 @@ struct Scanner {
9093
void skipLine(const char *&First, const char *const End);
9194
void skipDirective(StringRef Name, const char *&First, const char *const End);
9295

96+
/// Returns the spelling of a string literal or identifier after performing
97+
/// any processing needed to handle \c clang::Token::NeedsCleaning.
98+
StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok);
99+
93100
/// Lexes next token and if it is identifier returns its string, otherwise
94101
/// it skips the current line and returns \p None.
95102
///
@@ -111,13 +118,30 @@ struct Scanner {
111118
const char *&First,
112119
const char *const End);
113120

121+
/// Lexes next token and returns true iff it matches the kind \p K.
122+
/// Otherwise it skips the current line and returns false.
123+
///
124+
/// In any case (whatever the token kind) \p First and the \p Lexer will
125+
/// advance beyond the token.
126+
[[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,
127+
const char *const End);
128+
129+
/// Lexes next token and if it is string literal, returns its string.
130+
/// Otherwise, it skips the current line and returns \p std::nullopt.
131+
///
132+
/// In any case (whatever the token kind) \p First and the \p Lexer will
133+
/// advance beyond the token.
134+
[[nodiscard]] std::optional<StringRef>
135+
tryLexStringLiteralOrSkipLine(const char *&First, const char *const End);
136+
114137
[[nodiscard]] bool scanImpl(const char *First, const char *const End);
115138
[[nodiscard]] bool lexPPLine(const char *&First, const char *const End);
116139
[[nodiscard]] bool lexAt(const char *&First, const char *const End);
117140
[[nodiscard]] bool lexModule(const char *&First, const char *const End);
118141
[[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First,
119142
const char *const End);
120143
[[nodiscard]] bool lexPragma(const char *&First, const char *const End);
144+
[[nodiscard]] bool lex_Pragma(const char *&First, const char *const End);
121145
[[nodiscard]] bool lexEndif(const char *&First, const char *const End);
122146
[[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First,
123147
const char *const End);
@@ -524,22 +548,18 @@ void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
524548
}
525549
}
526550

527-
[[nodiscard]] Optional<StringRef>
528-
Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
529-
const dependency_directives_scan::Token &Tok = lexToken(First, End);
530-
if (Tok.isNot(tok::raw_identifier)) {
531-
if (!Tok.is(tok::eod))
532-
skipLine(First, End);
533-
return None;
534-
}
535-
551+
StringRef
552+
Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) {
536553
bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
537554
if (LLVM_LIKELY(!NeedsCleaning))
538555
return Input.slice(Tok.Offset, Tok.getEnd());
539556

540557
SmallString<64> Spelling;
541558
Spelling.resize(Tok.Length);
542559

560+
// FIXME: C++11 raw string literals need special handling (see getSpellingSlow
561+
// in the Lexer). Currently we cannot see them due to our LangOpts.
562+
543563
unsigned SpellingLength = 0;
544564
const char *BufPtr = Input.begin() + Tok.Offset;
545565
const char *AfterIdent = Input.begin() + Tok.getEnd();
@@ -554,6 +574,18 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
554574
.first->first();
555575
}
556576

577+
Optional<StringRef>
578+
Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
579+
const dependency_directives_scan::Token &Tok = lexToken(First, End);
580+
if (Tok.isNot(tok::raw_identifier)) {
581+
if (!Tok.is(tok::eod))
582+
skipLine(First, End);
583+
return None;
584+
}
585+
586+
return cleanStringIfNeeded(Tok);
587+
}
588+
557589
StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
558590
Optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
559591
assert(Id && "expected identifier token");
@@ -570,6 +602,28 @@ bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
570602
return false;
571603
}
572604

605+
bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,
606+
const char *const End) {
607+
const dependency_directives_scan::Token &Tok = lexToken(First, End);
608+
if (Tok.is(K))
609+
return true;
610+
skipLine(First, End);
611+
return false;
612+
}
613+
614+
std::optional<StringRef>
615+
Scanner::tryLexStringLiteralOrSkipLine(const char *&First,
616+
const char *const End) {
617+
const dependency_directives_scan::Token &Tok = lexToken(First, End);
618+
if (!tok::isStringLiteral(Tok.Kind)) {
619+
if (!Tok.is(tok::eod))
620+
skipLine(First, End);
621+
return std::nullopt;
622+
}
623+
624+
return cleanStringIfNeeded(Tok);
625+
}
626+
573627
bool Scanner::lexAt(const char *&First, const char *const End) {
574628
// Handle "@import".
575629

@@ -627,6 +681,41 @@ bool Scanner::lexModule(const char *&First, const char *const End) {
627681
return lexModuleDirectiveBody(Kind, First, End);
628682
}
629683

684+
bool Scanner::lex_Pragma(const char *&First, const char *const End) {
685+
if (!isNextTokenOrSkipLine(tok::l_paren, First, End))
686+
return false;
687+
688+
std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End);
689+
690+
if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End))
691+
return false;
692+
693+
SmallString<64> Buffer(*Str);
694+
prepare_PragmaString(Buffer);
695+
696+
// Use a new scanner instance since the tokens will be inside the allocated
697+
// string. We should already have captured all the relevant tokens in the
698+
// current scanner.
699+
SmallVector<dependency_directives_scan::Token> DiscardTokens;
700+
const char *Begin = Buffer.c_str();
701+
Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
702+
InputSourceLoc};
703+
704+
PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);
705+
if (PragmaScanner.lexPragma(Begin, Buffer.end()))
706+
return true;
707+
708+
DirectiveKind K = PragmaScanner.topDirective();
709+
if (K == pp_none) {
710+
skipLine(First, End);
711+
return false;
712+
}
713+
714+
assert(Begin == Buffer.end());
715+
pushDirective(K);
716+
return false;
717+
}
718+
630719
bool Scanner::lexPragma(const char *&First, const char *const End) {
631720
Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
632721
if (!FoundId)
@@ -711,6 +800,7 @@ static bool isStartOfRelevantLine(char First) {
711800
case 'i':
712801
case 'e':
713802
case 'm':
803+
case '_':
714804
return true;
715805
}
716806
return false;
@@ -747,6 +837,12 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
747837
if (*First == 'i' || *First == 'e' || *First == 'm')
748838
return lexModule(First, End);
749839

840+
if (*First == '_') {
841+
if (isNextIdentifierOrSkipLine("_Pragma", First, End))
842+
return lex_Pragma(First, End);
843+
return false;
844+
}
845+
750846
// Handle preprocessing directives.
751847

752848
TheLexer.setParsingPreprocessorDirective(true);

clang/lib/Lex/Pragma.cpp

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -261,17 +261,48 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
261261

262262
SourceLocation RParenLoc = Tok.getLocation();
263263
bool Invalid = false;
264-
std::string StrVal = getSpelling(StrTok, &Invalid);
264+
SmallString<64> StrVal;
265+
StrVal.resize(StrTok.getLength());
266+
StringRef StrValRef = getSpelling(StrTok, StrVal, &Invalid);
265267
if (Invalid) {
266268
Diag(PragmaLoc, diag::err__Pragma_malformed);
267269
return;
268270
}
269271

270-
// The _Pragma is lexically sound. Destringize according to C11 6.10.9.1:
271-
// "The string literal is destringized by deleting any encoding prefix,
272-
// deleting the leading and trailing double-quotes, replacing each escape
273-
// sequence \" by a double-quote, and replacing each escape sequence \\ by a
274-
// single backslash."
272+
assert(StrValRef.size() <= StrVal.size());
273+
274+
// If the token was spelled somewhere else, copy it.
275+
if (StrValRef.begin() != StrVal.begin())
276+
StrVal.assign(StrValRef);
277+
// Truncate if necessary.
278+
else if (StrValRef.size() != StrVal.size())
279+
StrVal.resize(StrValRef.size());
280+
281+
// The _Pragma is lexically sound. Destringize according to C11 6.10.9.1.
282+
prepare_PragmaString(StrVal);
283+
284+
// Plop the string (including the newline and trailing null) into a buffer
285+
// where we can lex it.
286+
Token TmpTok;
287+
TmpTok.startToken();
288+
CreateString(StrVal, TmpTok);
289+
SourceLocation TokLoc = TmpTok.getLocation();
290+
291+
// Make and enter a lexer object so that we lex and expand the tokens just
292+
// like any others.
293+
Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc,
294+
StrVal.size(), *this);
295+
296+
EnterSourceFileWithLexer(TL, nullptr);
297+
298+
// With everything set up, lex this as a #pragma directive.
299+
HandlePragmaDirective({PIK__Pragma, PragmaLoc});
300+
301+
// Finally, return whatever came after the pragma directive.
302+
return Lex(Tok);
303+
}
304+
305+
void clang::prepare_PragmaString(SmallVectorImpl<char> &StrVal) {
275306
if (StrVal[0] == 'L' || StrVal[0] == 'U' ||
276307
(StrVal[0] == 'u' && StrVal[1] != '8'))
277308
StrVal.erase(StrVal.begin());
@@ -295,8 +326,8 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
295326

296327
// Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the
297328
// parens below.
298-
StrVal.erase(0, 2 + NumDChars);
299-
StrVal.erase(StrVal.size() - 1 - NumDChars);
329+
StrVal.erase(StrVal.begin(), StrVal.begin() + 2 + NumDChars);
330+
StrVal.erase(StrVal.end() - 1 - NumDChars, StrVal.end());
300331
} else {
301332
assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
302333
"Invalid string token!");
@@ -318,27 +349,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
318349
StrVal[0] = ' ';
319350

320351
// Replace the terminating quote with a \n.
321-
StrVal[StrVal.size()-1] = '\n';
322-
323-
// Plop the string (including the newline and trailing null) into a buffer
324-
// where we can lex it.
325-
Token TmpTok;
326-
TmpTok.startToken();
327-
CreateString(StrVal, TmpTok);
328-
SourceLocation TokLoc = TmpTok.getLocation();
329-
330-
// Make and enter a lexer object so that we lex and expand the tokens just
331-
// like any others.
332-
Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc,
333-
StrVal.size(), *this);
334-
335-
EnterSourceFileWithLexer(TL, nullptr);
336-
337-
// With everything set up, lex this as a #pragma directive.
338-
HandlePragmaDirective({PIK__Pragma, PragmaLoc});
339-
340-
// Finally, return whatever came after the pragma directive.
341-
return Lex(Tok);
352+
StrVal[StrVal.size() - 1] = '\n';
342353
}
343354

344355
/// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Test scanning deps works with _Pragma syntax when not inside a macro.
2+
3+
// RUN: rm -rf %t
4+
// RUN: split-file %s %t
5+
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
6+
7+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1
8+
9+
//--- cdb.json.template
10+
[{
11+
"directory": "DIR",
12+
"command": "clang -fsyntax-only DIR/tu.c",
13+
"file": "DIR/tu.c"
14+
}]
15+
16+
//--- a.h
17+
_Pragma("once")
18+
#include "b.h"
19+
20+
//--- b.h
21+
#include "a.h"
22+
23+
//--- tu.c
24+
#include "a.h"

0 commit comments

Comments
 (0)