19
19
#include " clang/Basic/Diagnostic.h"
20
20
#include " clang/Lex/LexDiagnostic.h"
21
21
#include " clang/Lex/Lexer.h"
22
+ #include " clang/Lex/Pragma.h"
22
23
#include " llvm/ADT/ScopeExit.h"
23
24
#include " llvm/ADT/SmallString.h"
24
25
#include " llvm/ADT/StringMap.h"
@@ -71,6 +72,8 @@ struct Scanner {
71
72
// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
72
73
LangOpts.ObjC = true ;
73
74
LangOpts.LineComment = true ;
75
+ // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
76
+ // R"()" literals.
74
77
return LangOpts;
75
78
}
76
79
@@ -90,6 +93,10 @@ struct Scanner {
90
93
void skipLine (const char *&First, const char *const End);
91
94
void skipDirective (StringRef Name, const char *&First, const char *const End);
92
95
96
+ // / Returns the spelling of a string literal or identifier after performing
97
+ // / any processing needed to handle \c clang::Token::NeedsCleaning.
98
+ StringRef cleanStringIfNeeded (const dependency_directives_scan::Token &Tok);
99
+
93
100
// / Lexes next token and if it is identifier returns its string, otherwise
94
101
// / it skips the current line and returns \p None.
95
102
// /
@@ -111,13 +118,30 @@ struct Scanner {
111
118
const char *&First,
112
119
const char *const End);
113
120
121
+ // / Lexes next token and returns true iff it matches the kind \p K.
122
+ // / Otherwise it skips the current line and returns false.
123
+ // /
124
+ // / In any case (whatever the token kind) \p First and the \p Lexer will
125
+ // / advance beyond the token.
126
+ [[nodiscard]] bool isNextTokenOrSkipLine (tok::TokenKind K, const char *&First,
127
+ const char *const End);
128
+
129
+ // / Lexes next token and if it is string literal, returns its string.
130
+ // / Otherwise, it skips the current line and returns \p std::nullopt.
131
+ // /
132
+ // / In any case (whatever the token kind) \p First and the \p Lexer will
133
+ // / advance beyond the token.
134
+ [[nodiscard]] std::optional<StringRef>
135
+ tryLexStringLiteralOrSkipLine (const char *&First, const char *const End);
136
+
114
137
[[nodiscard]] bool scanImpl (const char *First, const char *const End);
115
138
[[nodiscard]] bool lexPPLine (const char *&First, const char *const End);
116
139
[[nodiscard]] bool lexAt (const char *&First, const char *const End);
117
140
[[nodiscard]] bool lexModule (const char *&First, const char *const End);
118
141
[[nodiscard]] bool lexDefine (const char *HashLoc, const char *&First,
119
142
const char *const End);
120
143
[[nodiscard]] bool lexPragma (const char *&First, const char *const End);
144
+ [[nodiscard]] bool lex_Pragma (const char *&First, const char *const End);
121
145
[[nodiscard]] bool lexEndif (const char *&First, const char *const End);
122
146
[[nodiscard]] bool lexDefault (DirectiveKind Kind, const char *&First,
123
147
const char *const End);
@@ -524,22 +548,18 @@ void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
524
548
}
525
549
}
526
550
527
- [[nodiscard]] Optional<StringRef>
528
- Scanner::tryLexIdentifierOrSkipLine (const char *&First, const char *const End) {
529
- const dependency_directives_scan::Token &Tok = lexToken (First, End);
530
- if (Tok.isNot (tok::raw_identifier)) {
531
- if (!Tok.is (tok::eod))
532
- skipLine (First, End);
533
- return None;
534
- }
535
-
551
+ StringRef
552
+ Scanner::cleanStringIfNeeded (const dependency_directives_scan::Token &Tok) {
536
553
bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
537
554
if (LLVM_LIKELY (!NeedsCleaning))
538
555
return Input.slice (Tok.Offset , Tok.getEnd ());
539
556
540
557
SmallString<64 > Spelling;
541
558
Spelling.resize (Tok.Length );
542
559
560
+ // FIXME: C++11 raw string literals need special handling (see getSpellingSlow
561
+ // in the Lexer). Currently we cannot see them due to our LangOpts.
562
+
543
563
unsigned SpellingLength = 0 ;
544
564
const char *BufPtr = Input.begin () + Tok.Offset ;
545
565
const char *AfterIdent = Input.begin () + Tok.getEnd ();
@@ -554,6 +574,18 @@ Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
554
574
.first ->first ();
555
575
}
556
576
577
+ Optional<StringRef>
578
+ Scanner::tryLexIdentifierOrSkipLine (const char *&First, const char *const End) {
579
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
580
+ if (Tok.isNot (tok::raw_identifier)) {
581
+ if (!Tok.is (tok::eod))
582
+ skipLine (First, End);
583
+ return None;
584
+ }
585
+
586
+ return cleanStringIfNeeded (Tok);
587
+ }
588
+
557
589
StringRef Scanner::lexIdentifier (const char *&First, const char *const End) {
558
590
Optional<StringRef> Id = tryLexIdentifierOrSkipLine (First, End);
559
591
assert (Id && " expected identifier token" );
@@ -570,6 +602,28 @@ bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
570
602
return false ;
571
603
}
572
604
605
+ bool Scanner::isNextTokenOrSkipLine (tok::TokenKind K, const char *&First,
606
+ const char *const End) {
607
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
608
+ if (Tok.is (K))
609
+ return true ;
610
+ skipLine (First, End);
611
+ return false ;
612
+ }
613
+
614
+ std::optional<StringRef>
615
+ Scanner::tryLexStringLiteralOrSkipLine (const char *&First,
616
+ const char *const End) {
617
+ const dependency_directives_scan::Token &Tok = lexToken (First, End);
618
+ if (!tok::isStringLiteral (Tok.Kind )) {
619
+ if (!Tok.is (tok::eod))
620
+ skipLine (First, End);
621
+ return std::nullopt;
622
+ }
623
+
624
+ return cleanStringIfNeeded (Tok);
625
+ }
626
+
573
627
bool Scanner::lexAt (const char *&First, const char *const End) {
574
628
// Handle "@import".
575
629
@@ -627,6 +681,41 @@ bool Scanner::lexModule(const char *&First, const char *const End) {
627
681
return lexModuleDirectiveBody (Kind, First, End);
628
682
}
629
683
684
+ bool Scanner::lex_Pragma (const char *&First, const char *const End) {
685
+ if (!isNextTokenOrSkipLine (tok::l_paren, First, End))
686
+ return false ;
687
+
688
+ std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine (First, End);
689
+
690
+ if (!Str || !isNextTokenOrSkipLine (tok::r_paren, First, End))
691
+ return false ;
692
+
693
+ SmallString<64 > Buffer (*Str);
694
+ prepare_PragmaString (Buffer);
695
+
696
+ // Use a new scanner instance since the tokens will be inside the allocated
697
+ // string. We should already have captured all the relevant tokens in the
698
+ // current scanner.
699
+ SmallVector<dependency_directives_scan::Token> DiscardTokens;
700
+ const char *Begin = Buffer.c_str ();
701
+ Scanner PragmaScanner{StringRef (Begin, Buffer.size ()), DiscardTokens, Diags,
702
+ InputSourceLoc};
703
+
704
+ PragmaScanner.TheLexer .setParsingPreprocessorDirective (true );
705
+ if (PragmaScanner.lexPragma (Begin, Buffer.end ()))
706
+ return true ;
707
+
708
+ DirectiveKind K = PragmaScanner.topDirective ();
709
+ if (K == pp_none) {
710
+ skipLine (First, End);
711
+ return false ;
712
+ }
713
+
714
+ assert (Begin == Buffer.end ());
715
+ pushDirective (K);
716
+ return false ;
717
+ }
718
+
630
719
bool Scanner::lexPragma (const char *&First, const char *const End) {
631
720
Optional<StringRef> FoundId = tryLexIdentifierOrSkipLine (First, End);
632
721
if (!FoundId)
@@ -711,6 +800,7 @@ static bool isStartOfRelevantLine(char First) {
711
800
case ' i' :
712
801
case ' e' :
713
802
case ' m' :
803
+ case ' _' :
714
804
return true ;
715
805
}
716
806
return false ;
@@ -747,6 +837,12 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
747
837
if (*First == ' i' || *First == ' e' || *First == ' m' )
748
838
return lexModule (First, End);
749
839
840
+ if (*First == ' _' ) {
841
+ if (isNextIdentifierOrSkipLine (" _Pragma" , First, End))
842
+ return lex_Pragma (First, End);
843
+ return false ;
844
+ }
845
+
750
846
// Handle preprocessing directives.
751
847
752
848
TheLexer.setParsingPreprocessorDirective (true );
0 commit comments