Skip to content

Commit ae55481

Browse files
authored
Merge pull request swiftlang#19784 from rintaro/lexer-rdar20289969
[Lexer] Skip comments in interpolated expression in string literal
2 parents 16ec978 + 8f7254d commit ae55481

File tree

3 files changed

+105
-49
lines changed

3 files changed

+105
-49
lines changed

lib/Parse/Lexer.cpp

Lines changed: 90 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -382,44 +382,51 @@ static void diagnoseEmbeddedNul(DiagnosticEngine *Diags, const char *Ptr) {
382382
.fixItRemoveChars(NulLoc, NulEndLoc);
383383
}
384384

385-
void Lexer::skipToEndOfLine(bool EatNewline) {
385+
/// Advance \p CurPtr to the end of line or the end of file. Returns \c true
386+
/// if it stopped at the end of line, \c false if it stopped at the end of file.
387+
static bool advanceToEndOfLine(const char *&CurPtr, const char *BufferEnd,
388+
const char *CodeCompletionPtr = nullptr,
389+
DiagnosticEngine *Diags = nullptr) {
386390
while (1) {
387391
switch (*CurPtr++) {
388392
case '\n':
389393
case '\r':
390-
if (EatNewline) {
391-
NextToken.setAtStartOfLine(true);
392-
} else {
393-
--CurPtr;
394-
}
395-
return; // If we found the end of the line, return.
394+
--CurPtr;
395+
return true; // If we found the end of the line, return.
396396
default:
397397
// If this is a "high" UTF-8 character, validate it.
398-
if ((signed char)(CurPtr[-1]) < 0) {
398+
if (Diags && (signed char)(CurPtr[-1]) < 0) {
399399
--CurPtr;
400400
const char *CharStart = CurPtr;
401401
if (validateUTF8CharacterAndAdvance(CurPtr, BufferEnd) == ~0U)
402-
diagnose(CharStart, diag::lex_invalid_utf8);
402+
Diags->diagnose(Lexer::getSourceLoc(CharStart),
403+
diag::lex_invalid_utf8);
403404
}
404405
break; // Otherwise, eat other characters.
405406
case 0:
406-
switch (getNulCharacterKind(CurPtr - 1)) {
407-
case NulCharacterKind::Embedded:
408-
// If this is a random nul character in the middle of a buffer, skip it
409-
// as whitespace.
410-
diagnoseEmbeddedNul(Diags, CurPtr-1);
411-
LLVM_FALLTHROUGH;
412-
case NulCharacterKind::CodeCompletion:
407+
if (CurPtr - 1 != BufferEnd) {
408+
if (Diags && CurPtr - 1 != CodeCompletionPtr) {
409+
// If this is a random nul character in the middle of a buffer, skip
410+
// it as whitespace.
411+
diagnoseEmbeddedNul(Diags, CurPtr - 1);
412+
}
413413
continue;
414-
case NulCharacterKind::BufferEnd:
415-
// Otherwise, the last line of the file does not have a newline.
416-
--CurPtr;
417-
return;
418414
}
415+
// Otherwise, the last line of the file does not have a newline.
416+
--CurPtr;
417+
return false;
419418
}
420419
}
421420
}
422421

422+
void Lexer::skipToEndOfLine(bool EatNewline) {
423+
bool isEOL = advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, Diags);
424+
if (EatNewline && isEOL) {
425+
++CurPtr;
426+
NextToken.setAtStartOfLine(true);
427+
}
428+
}
429+
423430
void Lexer::skipSlashSlashComment(bool EatNewline) {
424431
assert(CurPtr[-1] == '/' && CurPtr[0] == '/' && "Not a // comment");
425432
skipToEndOfLine(EatNewline);
@@ -431,26 +438,28 @@ void Lexer::skipHashbang(bool EatNewline) {
431438
skipToEndOfLine(EatNewline);
432439
}
433440

434-
/// skipSlashStarComment - /**/ comments are skipped (treated as whitespace).
435-
/// Note that (unlike in C) block comments can be nested.
436-
void Lexer::skipSlashStarComment() {
441+
static bool skipToEndOfSlashStarComment(const char *&CurPtr,
442+
const char *BufferEnd,
443+
const char *CodeCompletionPtr = nullptr,
444+
DiagnosticEngine *Diags = nullptr) {
437445
const char *StartPtr = CurPtr-1;
438446
assert(CurPtr[-1] == '/' && CurPtr[0] == '*' && "Not a /* comment");
439447
// Make sure to advance over the * so that we don't incorrectly handle /*/ as
440448
// the beginning and end of the comment.
441449
++CurPtr;
442-
450+
443451
// /**/ comments can be nested, keep track of how deep we've gone.
444452
unsigned Depth = 1;
445-
453+
bool isMultiline = false;
454+
446455
while (1) {
447456
switch (*CurPtr++) {
448457
case '*':
449458
// Check for a '*/'
450459
if (*CurPtr == '/') {
451460
++CurPtr;
452461
if (--Depth == 0)
453-
return;
462+
return isMultiline;
454463
}
455464
break;
456465
case '/':
@@ -463,48 +472,58 @@ void Lexer::skipSlashStarComment() {
463472

464473
case '\n':
465474
case '\r':
466-
NextToken.setAtStartOfLine(true);
475+
isMultiline = true;
467476
break;
468477

469478
default:
470479
// If this is a "high" UTF-8 character, validate it.
471-
if ((signed char)(CurPtr[-1]) < 0) {
480+
if (Diags && (signed char)(CurPtr[-1]) < 0) {
472481
--CurPtr;
473482
const char *CharStart = CurPtr;
474483
if (validateUTF8CharacterAndAdvance(CurPtr, BufferEnd) == ~0U)
475-
diagnose(CharStart, diag::lex_invalid_utf8);
484+
Diags->diagnose(Lexer::getSourceLoc(CharStart),
485+
diag::lex_invalid_utf8);
476486
}
477487

478488
break; // Otherwise, eat other characters.
479489
case 0:
480-
switch (getNulCharacterKind(CurPtr - 1)) {
481-
case NulCharacterKind::Embedded:
482-
// If this is a random nul character in the middle of a buffer, skip it
483-
// as whitespace.
484-
diagnoseEmbeddedNul(Diags, CurPtr - 1);
485-
LLVM_FALLTHROUGH;
486-
case NulCharacterKind::CodeCompletion:
490+
if (CurPtr - 1 != BufferEnd) {
491+
if (Diags && CurPtr - 1 != CodeCompletionPtr) {
492+
// If this is a random nul character in the middle of a buffer, skip
493+
// it as whitespace.
494+
diagnoseEmbeddedNul(Diags, CurPtr - 1);
495+
}
487496
continue;
488-
case NulCharacterKind::BufferEnd: {
489-
// Otherwise, we have an unterminated /* comment.
490-
--CurPtr;
497+
}
498+
// Otherwise, we have an unterminated /* comment.
499+
--CurPtr;
491500

501+
if (Diags) {
492502
// Count how many levels deep we are.
493503
llvm::SmallString<8> Terminator("*/");
494504
while (--Depth != 0)
495505
Terminator += "*/";
496-
497506
const char *EOL = (CurPtr[-1] == '\n') ? (CurPtr - 1) : CurPtr;
498-
diagnose(EOL, diag::lex_unterminated_block_comment)
499-
.fixItInsert(getSourceLoc(EOL), Terminator);
500-
diagnose(StartPtr, diag::lex_comment_start);
501-
return;
502-
}
507+
Diags
508+
->diagnose(Lexer::getSourceLoc(EOL),
509+
diag::lex_unterminated_block_comment)
510+
.fixItInsert(Lexer::getSourceLoc(EOL), Terminator);
511+
Diags->diagnose(Lexer::getSourceLoc(StartPtr), diag::lex_comment_start);
503512
}
513+
return isMultiline;
504514
}
505515
}
506516
}
507517

518+
/// skipSlashStarComment - /**/ comments are skipped (treated as whitespace).
519+
/// Note that (unlike in C) block comments can be nested.
520+
void Lexer::skipSlashStarComment() {
521+
bool isMultiline =
522+
skipToEndOfSlashStarComment(CurPtr, BufferEnd, CodeCompletionPtr, Diags);
523+
if (isMultiline)
524+
NextToken.setAtStartOfLine(true);
525+
}
526+
508527
static bool isValidIdentifierContinuationCodePoint(uint32_t c) {
509528
if (c < 0x80)
510529
return clang::isIdentifierBody(c, /*dollar*/true);
@@ -1537,6 +1556,29 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
15371556
assert(inStringLiteral());
15381557
continue;
15391558
}
1559+
case '/':
1560+
if (inStringLiteral())
1561+
continue;
1562+
1563+
if (*CurPtr == '*') {
1564+
auto CommentStart = CurPtr - 1;
1565+
bool isMultilineComment = skipToEndOfSlashStarComment(CurPtr, EndPtr);
1566+
if (isMultilineComment && !AllowNewline.back()) {
1567+
// Multiline comment is prohibited in string literal.
1568+
// Return the start of the comment.
1569+
return CommentStart;
1570+
}
1571+
} else if (*CurPtr == '/') {
1572+
if (!AllowNewline.back()) {
1573+
// '//' comment is impossible in single line string literal.
1574+
// Return the start of the comment.
1575+
return CurPtr - 1;
1576+
}
1577+
// Advance to the end of the comment.
1578+
if (/*isEOL=*/advanceToEndOfLine(CurPtr, EndPtr))
1579+
++CurPtr;
1580+
}
1581+
continue;
15401582
default:
15411583
// Normal token character.
15421584
continue;
@@ -1823,11 +1865,10 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
18231865
diagnose(CurPtr, diag::lex_unterminated_string);
18241866
wasErroneous = true;
18251867
continue;
1868+
} else {
1869+
diagnose(TokStart, diag::lex_unterminated_string);
1870+
return formToken(tok::unknown, TokStart);
18261871
}
1827-
1828-
// Being diagnosed below.
1829-
assert((*CurPtr == '\r' || *CurPtr == '\n' || CurPtr == BufferEnd) &&
1830-
"Returned at unexpected position");
18311872
}
18321873

18331874
// String literals cannot have \n or \r in them (unless multiline).

test/Parse/multiline_string.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,9 +221,14 @@ _ = "hello\("""
221221
_ = """
222222
welcome
223223
\(
224+
/*
225+
')' or '"""' in comment.
226+
"""
227+
*/
224228
"to\("""
225229
Swift
226230
""")"
231+
// ) or """ in comment.
227232
)
228233
!
229234
"""

test/expr/expressions.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,3 +919,13 @@ let _: Int64 = Int64(0xFFF_FFFF_FFFF_FFFF)
919919
let _: Int64 = 0xFFF_FFFF_FFFF_FFFF as Int64
920920
let _ = Int64(0xFFF_FFFF_FFFF_FFFF)
921921
let _ = 0xFFF_FFFF_FFFF_FFFF as Int64
922+
923+
// rdar://problem/20289969 - string interpolation with comment containing ')' or '"'
924+
let _ = "foo \(42 /* ) " ) */)"
925+
let _ = "foo \(foo // ) " // expected-error {{unterminated string literal}}
926+
let _ = "foo \(42 /*
927+
* multiline comment
928+
*/)end"
929+
// expected-error @-3 {{unterminated string literal}}
930+
// expected-error @-2 {{expected expression}}
931+
// expected-error @-3 {{unterminated string literal}}

0 commit comments

Comments
 (0)