@@ -382,44 +382,51 @@ static void diagnoseEmbeddedNul(DiagnosticEngine *Diags, const char *Ptr) {
382
382
.fixItRemoveChars (NulLoc, NulEndLoc);
383
383
}
384
384
385
- void Lexer::skipToEndOfLine (bool EatNewline) {
385
+ // / Advance \p CurPtr to the end of line or the end of file. Returns \c true
386
+ // / if it stopped at the end of line, \c false if it stopped at the end of file.
387
+ static bool advanceToEndOfLine (const char *&CurPtr, const char *BufferEnd,
388
+ const char *CodeCompletionPtr = nullptr ,
389
+ DiagnosticEngine *Diags = nullptr ) {
386
390
while (1 ) {
387
391
switch (*CurPtr++) {
388
392
case ' \n ' :
389
393
case ' \r ' :
390
- if (EatNewline) {
391
- NextToken.setAtStartOfLine (true );
392
- } else {
393
- --CurPtr;
394
- }
395
- return ; // If we found the end of the line, return.
394
+ --CurPtr;
395
+ return true ; // If we found the end of the line, return.
396
396
default :
397
397
// If this is a "high" UTF-8 character, validate it.
398
- if ((signed char )(CurPtr[-1 ]) < 0 ) {
398
+ if (Diags && (signed char )(CurPtr[-1 ]) < 0 ) {
399
399
--CurPtr;
400
400
const char *CharStart = CurPtr;
401
401
if (validateUTF8CharacterAndAdvance (CurPtr, BufferEnd) == ~0U )
402
- diagnose (CharStart, diag::lex_invalid_utf8);
402
+ Diags->diagnose (Lexer::getSourceLoc (CharStart),
403
+ diag::lex_invalid_utf8);
403
404
}
404
405
break ; // Otherwise, eat other characters.
405
406
case 0 :
406
- switch (getNulCharacterKind (CurPtr - 1 )) {
407
- case NulCharacterKind::Embedded:
408
- // If this is a random nul character in the middle of a buffer, skip it
409
- // as whitespace.
410
- diagnoseEmbeddedNul (Diags, CurPtr-1 );
411
- LLVM_FALLTHROUGH;
412
- case NulCharacterKind::CodeCompletion:
407
+ if (CurPtr - 1 != BufferEnd) {
408
+ if (Diags && CurPtr - 1 != CodeCompletionPtr) {
409
+ // If this is a random nul character in the middle of a buffer, skip
410
+ // it as whitespace.
411
+ diagnoseEmbeddedNul (Diags, CurPtr - 1 );
412
+ }
413
413
continue ;
414
- case NulCharacterKind::BufferEnd:
415
- // Otherwise, the last line of the file does not have a newline.
416
- --CurPtr;
417
- return ;
418
414
}
415
+ // Otherwise, the last line of the file does not have a newline.
416
+ --CurPtr;
417
+ return false ;
419
418
}
420
419
}
421
420
}
422
421
422
+ void Lexer::skipToEndOfLine (bool EatNewline) {
423
+ bool isEOL = advanceToEndOfLine (CurPtr, BufferEnd, CodeCompletionPtr, Diags);
424
+ if (EatNewline && isEOL) {
425
+ ++CurPtr;
426
+ NextToken.setAtStartOfLine (true );
427
+ }
428
+ }
429
+
423
430
void Lexer::skipSlashSlashComment (bool EatNewline) {
424
431
assert (CurPtr[-1 ] == ' /' && CurPtr[0 ] == ' /' && " Not a // comment" );
425
432
skipToEndOfLine (EatNewline);
@@ -431,26 +438,28 @@ void Lexer::skipHashbang(bool EatNewline) {
431
438
skipToEndOfLine (EatNewline);
432
439
}
433
440
434
- // / skipSlashStarComment - /**/ comments are skipped (treated as whitespace).
435
- // / Note that (unlike in C) block comments can be nested.
436
- void Lexer::skipSlashStarComment () {
441
+ static bool skipToEndOfSlashStarComment (const char *&CurPtr,
442
+ const char *BufferEnd,
443
+ const char *CodeCompletionPtr = nullptr ,
444
+ DiagnosticEngine *Diags = nullptr ) {
437
445
const char *StartPtr = CurPtr-1 ;
438
446
assert (CurPtr[-1 ] == ' /' && CurPtr[0 ] == ' *' && " Not a /* comment" );
439
447
// Make sure to advance over the * so that we don't incorrectly handle /*/ as
440
448
// the beginning and end of the comment.
441
449
++CurPtr;
442
-
450
+
443
451
// /**/ comments can be nested, keep track of how deep we've gone.
444
452
unsigned Depth = 1 ;
445
-
453
+ bool isMultiline = false ;
454
+
446
455
while (1 ) {
447
456
switch (*CurPtr++) {
448
457
case ' *' :
449
458
// Check for a '*/'
450
459
if (*CurPtr == ' /' ) {
451
460
++CurPtr;
452
461
if (--Depth == 0 )
453
- return ;
462
+ return isMultiline ;
454
463
}
455
464
break ;
456
465
case ' /' :
@@ -463,48 +472,58 @@ void Lexer::skipSlashStarComment() {
463
472
464
473
case ' \n ' :
465
474
case ' \r ' :
466
- NextToken. setAtStartOfLine ( true ) ;
475
+ isMultiline = true ;
467
476
break ;
468
477
469
478
default :
470
479
// If this is a "high" UTF-8 character, validate it.
471
- if ((signed char )(CurPtr[-1 ]) < 0 ) {
480
+ if (Diags && (signed char )(CurPtr[-1 ]) < 0 ) {
472
481
--CurPtr;
473
482
const char *CharStart = CurPtr;
474
483
if (validateUTF8CharacterAndAdvance (CurPtr, BufferEnd) == ~0U )
475
- diagnose (CharStart, diag::lex_invalid_utf8);
484
+ Diags->diagnose (Lexer::getSourceLoc (CharStart),
485
+ diag::lex_invalid_utf8);
476
486
}
477
487
478
488
break ; // Otherwise, eat other characters.
479
489
case 0 :
480
- switch (getNulCharacterKind (CurPtr - 1 )) {
481
- case NulCharacterKind::Embedded:
482
- // If this is a random nul character in the middle of a buffer, skip it
483
- // as whitespace.
484
- diagnoseEmbeddedNul (Diags, CurPtr - 1 );
485
- LLVM_FALLTHROUGH;
486
- case NulCharacterKind::CodeCompletion:
490
+ if (CurPtr - 1 != BufferEnd) {
491
+ if (Diags && CurPtr - 1 != CodeCompletionPtr) {
492
+ // If this is a random nul character in the middle of a buffer, skip
493
+ // it as whitespace.
494
+ diagnoseEmbeddedNul (Diags, CurPtr - 1 );
495
+ }
487
496
continue ;
488
- case NulCharacterKind::BufferEnd: {
489
- // Otherwise, we have an unterminated /* comment.
490
- --CurPtr;
497
+ }
498
+ // Otherwise, we have an unterminated /* comment.
499
+ --CurPtr;
491
500
501
+ if (Diags) {
492
502
// Count how many levels deep we are.
493
503
llvm::SmallString<8 > Terminator (" */" );
494
504
while (--Depth != 0 )
495
505
Terminator += " */" ;
496
-
497
506
const char *EOL = (CurPtr[-1 ] == ' \n ' ) ? (CurPtr - 1 ) : CurPtr;
498
- diagnose (EOL, diag::lex_unterminated_block_comment)
499
- . fixItInsert ( getSourceLoc (EOL), Terminator);
500
- diagnose (StartPtr, diag::lex_comment_start);
501
- return ;
502
- }
507
+ Diags
508
+ -> diagnose ( Lexer:: getSourceLoc (EOL),
509
+ diag::lex_unterminated_block_comment)
510
+ . fixItInsert ( Lexer::getSourceLoc (EOL), Terminator) ;
511
+ Diags-> diagnose ( Lexer::getSourceLoc (StartPtr), diag::lex_comment_start);
503
512
}
513
+ return isMultiline;
504
514
}
505
515
}
506
516
}
507
517
518
+ // / skipSlashStarComment - /**/ comments are skipped (treated as whitespace).
519
+ // / Note that (unlike in C) block comments can be nested.
520
+ void Lexer::skipSlashStarComment () {
521
+ bool isMultiline =
522
+ skipToEndOfSlashStarComment (CurPtr, BufferEnd, CodeCompletionPtr, Diags);
523
+ if (isMultiline)
524
+ NextToken.setAtStartOfLine (true );
525
+ }
526
+
508
527
static bool isValidIdentifierContinuationCodePoint (uint32_t c) {
509
528
if (c < 0x80 )
510
529
return clang::isIdentifierBody (c, /* dollar*/ true );
@@ -1537,6 +1556,29 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
1537
1556
assert (inStringLiteral ());
1538
1557
continue ;
1539
1558
}
1559
+ case ' /' :
1560
+ if (inStringLiteral ())
1561
+ continue ;
1562
+
1563
+ if (*CurPtr == ' *' ) {
1564
+ auto CommentStart = CurPtr - 1 ;
1565
+ bool isMultilineComment = skipToEndOfSlashStarComment (CurPtr, EndPtr);
1566
+ if (isMultilineComment && !AllowNewline.back ()) {
1567
+ // Multiline comment is prohibited in string literal.
1568
+ // Return the start of the comment.
1569
+ return CommentStart;
1570
+ }
1571
+ } else if (*CurPtr == ' /' ) {
1572
+ if (!AllowNewline.back ()) {
1573
+ // '//' comment is impossible in single line string literal.
1574
+ // Return the start of the comment.
1575
+ return CurPtr - 1 ;
1576
+ }
1577
+ // Advance to the end of the comment.
1578
+ if (/* isEOL=*/ advanceToEndOfLine (CurPtr, EndPtr))
1579
+ ++CurPtr;
1580
+ }
1581
+ continue ;
1540
1582
default :
1541
1583
// Normal token character.
1542
1584
continue ;
@@ -1823,11 +1865,10 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) {
1823
1865
diagnose (CurPtr, diag::lex_unterminated_string);
1824
1866
wasErroneous = true ;
1825
1867
continue ;
1868
+ } else {
1869
+ diagnose (TokStart, diag::lex_unterminated_string);
1870
+ return formToken (tok::unknown, TokStart);
1826
1871
}
1827
-
1828
- // Being diagnosed below.
1829
- assert ((*CurPtr == ' \r ' || *CurPtr == ' \n ' || CurPtr == BufferEnd) &&
1830
- " Returned at unexpected position" );
1831
1872
}
1832
1873
1833
1874
// String literals cannot have \n or \r in them (unless multiline).
0 commit comments