Skip to content

Commit 8597237

Browse files
committed
[clang][scandeps] Improve handling of rawstrings.
The current parser just checks one step back for a R before each string to know that it's a rawstring. But the preprocessor is much more advanced here and can have constructs like: R\ "str" And much more. This patch also adds more test coverage for Rawstrings in the dependencydirectivesscanner. This was co-authored by Sylvain Audi <[email protected]> Fixes #137648
1 parent 6b129d6 commit 8597237

File tree

2 files changed

+91
-7
lines changed

2 files changed

+91
-7
lines changed

clang/lib/Lex/DependencyDirectivesScanner.cpp

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,24 @@ static void skipOverSpaces(const char *&First, const char *const End) {
206206
++First;
207207
}
208208

209+
// Move back by one character, skipping escaped newlines (backslash + \n)
210+
static char previousChar(const char *First, const char *&Current) {
211+
assert(Current > First);
212+
--Current;
213+
while (Current > First + 1 && isVerticalWhitespace(*Current)) {
214+
const char PrevChar = *(Current - 1);
215+
if (PrevChar == '\\') {
216+
Current -= 2; // backslash + (\n or \r)
217+
} else if (Current > First + 2 && isVerticalWhitespace(PrevChar) &&
218+
PrevChar != *Current && *(Current - 2) == '\\') {
219+
Current -= 3; // backslash + (\n\r or \r\n)
220+
} else {
221+
break;
222+
}
223+
}
224+
return *Current;
225+
}
226+
209227
[[nodiscard]] static bool isRawStringLiteral(const char *First,
210228
const char *Current) {
211229
assert(First <= Current);
@@ -215,25 +233,28 @@ static void skipOverSpaces(const char *&First, const char *const End) {
215233
return false;
216234

217235
// Check for an "R".
218-
--Current;
219-
if (*Current != 'R')
236+
if (previousChar(First, Current) != 'R')
220237
return false;
221-
if (First == Current || !isAsciiIdentifierContinue(*--Current))
238+
if (First == Current ||
239+
!isAsciiIdentifierContinue(previousChar(First, Current)))
222240
return true;
223241

224242
// Check for a prefix of "u", "U", or "L".
225243
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
226-
return First == Current || !isAsciiIdentifierContinue(*--Current);
244+
return First == Current ||
245+
!isAsciiIdentifierContinue(previousChar(First, Current));
227246

228247
// Check for a prefix of "u8".
229-
if (*Current != '8' || First == Current || *Current-- != 'u')
248+
if (*Current != '8' || First == Current ||
249+
previousChar(First, Current) != 'u')
230250
return false;
231-
return First == Current || !isAsciiIdentifierContinue(*--Current);
251+
return First == Current ||
252+
!isAsciiIdentifierContinue(previousChar(First, Current));
232253
}
233254

234255
static void skipRawString(const char *&First, const char *const End) {
235256
assert(First[0] == '"');
236-
assert(First[-1] == 'R');
257+
//assert(First[-1] == 'R');
237258

238259
const char *Last = ++First;
239260
while (Last != End && *Last != '(')
@@ -416,6 +437,14 @@ void Scanner::skipLine(const char *&First, const char *const End) {
416437
continue;
417438
}
418439

440+
// Continue on the same line if an EOL is preceded with backslash
441+
if (First + 1 < End && *First == '\\') {
442+
if (unsigned Len = isEOL(First + 1, End)) {
443+
First += 1 + Len;
444+
continue;
445+
}
446+
}
447+
419448
// Iterate over comments correctly.
420449
if (*First != '/' || End - First < 2) {
421450
LastTokenPtr = First;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// RUN: rm -rf %t
2+
// RUN: split-file %s %t
3+
// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
4+
5+
//--- cdb.json.in
6+
[{
7+
"directory": "DIR",
8+
"command": "clang -c DIR/tu.c -o DIR/tu.o -IDIR/include",
9+
"file": "DIR/tu.c"
10+
}]
11+
//--- include/header.h
12+
//--- include/header2.h
13+
//--- include/header3.h
14+
//--- include/header4.h
15+
//--- tu.c
16+
#if 0
17+
R"x()x"
18+
#endif
19+
20+
#include "header.h"
21+
22+
#if 0
23+
R"y(";
24+
#endif
25+
#include "header2.h"
26+
27+
#if 0
28+
//")y"
29+
#endif
30+
31+
#if 0
32+
R"y(";
33+
R"z()y";
34+
#endif
35+
#include "header3.h"
36+
#if 0
37+
//")z"
38+
#endif
39+
40+
#if 0
41+
R\
42+
"y(";
43+
R"z()y";
44+
#endif
45+
#include "header4.h"
46+
#if 0
47+
//")z"
48+
#endif
49+
50+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess | FileCheck %s
51+
// RUN: clang-scan-deps -compilation-database %t/cdb.json -mode preprocess-dependency-directives | FileCheck %s
52+
// CHECK: tu.c
53+
// CHECK-NEXT: header.h
54+
// CHECK-NEXT: header3.h
55+
// CHECK-NEXT: header4.h

0 commit comments

Comments
 (0)