diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 0456dd961fc30..29eee0f671791 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -222,14 +222,6 @@ class TokenLexer { /// macro expansion source location entry. SourceLocation getExpansionLocForMacroDefLoc(SourceLocation loc) const; - /// Creates SLocEntries and updates the locations of macro argument - /// tokens to their new expanded locations. - /// - /// \param ArgIdSpellLoc the location of the macro argument id inside the - /// macro definition. - void updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, - Token *begin_tokens, Token *end_tokens); - /// Remove comma ahead of __VA_ARGS__, if present, according to compiler /// dialect settings. Returns true if the comma is removed. bool MaybeRemoveCommaBeforeVaArgs(SmallVectorImpl &ResultToks, diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 9e046633328d7..32e2b8cdcf4c6 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -306,7 +306,7 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, *OS << '\n'; StartedNewLine = true; } else if (!DisableLineMarkers) { - if (LineNo - CurLine <= 8) { + if (LineNo >= CurLine && LineNo - CurLine <= 8) { const char *NewLines = "\n\n\n\n\n\n\n\n"; OS->write(NewLines, LineNo - CurLine); } else { diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp index 548df16c59f6b..43e1ba9146c24 100644 --- a/clang/lib/Lex/MacroArgs.cpp +++ b/clang/lib/Lex/MacroArgs.cpp @@ -65,8 +65,16 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, "assume trivial copyability if copying into the " "uninitialized array (as opposed to reusing a cached " "MacroArgs)"); - std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), - Result->getTrailingObjects()); + Token *Tokens = Result->getTrailingObjects(); + std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(), Tokens); + + // Clear StartOfLine flag on all argument tokens. These tokens are being + // used in a macro expansion context where their original line position + // is not relevant. Keeping this flag causes incorrect spacing during + // stringification (e.g., "a/ b" instead of "a/b"). + for (unsigned i = 0; i < UnexpArgTokens.size(); ++i) { + Tokens[i].clearFlag(Token::StartOfLine); + } } return Result; diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 47f4134fb1465..61fb1406a7185 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -477,12 +477,6 @@ void TokenLexer::ExpandFunctionArguments() { if (Tok.is(tok::hashhash)) Tok.setKind(tok::unknown); - if(ExpandLocStart.isValid()) { - updateLocForMacroArgTokens(CurTok.getLocation(), - ResultToks.begin()+FirstResult, - ResultToks.end()); - } - // If any tokens were substituted from the argument, the whitespace // before the first token should match the whitespace of the arg // identifier. @@ -534,11 +528,6 @@ void TokenLexer::ExpandFunctionArguments() { Tok.setKind(tok::unknown); } - if (ExpandLocStart.isValid()) { - updateLocForMacroArgTokens(CurTok.getLocation(), - ResultToks.end()-NumToks, ResultToks.end()); - } - // Transfer the leading whitespace information from the token // (the macro argument) onto the first token of the // expansion. Note that we don't do this for the GNU @@ -600,6 +589,23 @@ void TokenLexer::ExpandFunctionArguments() { assert(!OwnsTokens && "This would leak if we already own the token list"); // This is deleted in the dtor. NumTokens = ResultToks.size(); + + // Set StartOfLine flags for all tokens based on their line numbers. This + // ensures HandleWhitespaceBeforeTok calls MoveToLine for tokens on + // different lines. + SourceManager &SM = PP.getSourceManager(); + unsigned PrevLine = 0; + for (size_t i = 0; i < ResultToks.size(); ++i) { + Token &Tok = ResultToks[i]; + SourceLocation Loc = Tok.getLocation(); + unsigned CurLine = SM.getPresumedLoc(Loc).getLine(); + if (i == 0 || (CurLine != PrevLine && CurLine > 0)) { + // First token or token on different line - mark as start of line + Tok.setFlag(Token::StartOfLine); + } + PrevLine = CurLine; + } + // The tokens will be added to Preprocessor's cache and will be removed // when this TokenLexer finishes lexing them. Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); @@ -678,7 +684,31 @@ bool TokenLexer::Lex(Token &Tok) { ExpandLocEnd, Tok.getLength()); } else { - instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); + // Check if this token is from a macro argument. + // Macro argument tokens have FileID locations (not MacroID) because we + // skipped remapping in updateLocForMacroArgTokens. We need to check if + // the token is from the macro definition or from an argument. + if (SM.isInSLocAddrSpace(Tok.getLocation(), MacroDefStart, + MacroDefLength)) { + // Token is from macro definition - remap to expansion location + instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); + } else { + // Token is from macro argument - wrap FileID in MacroID for diagnostic + // suppression while preserving line numbers. Use self-referencing + // createMacroArgExpansionLoc where both spelling and expansion point + // to the same FileID. This ensures isMacroID() returns true (for + // diagnostic suppression) while getDecomposedExpansionLoc() resolves + // to the original FileID (for line number preservation). + instLoc = SM.createMacroArgExpansionLoc( + Tok.getLocation(), Tok.getLocation(), Tok.getLength()); + + // Clear the StartOfLine flag for macro argument tokens. These tokens + // are being inserted into a macro expansion context, and their original + // line position from the source is not relevant. Keeping this flag + // would cause incorrect spacing during stringification (e.g., "a/ b" + // instead of "a/b"). + Tok.clearFlag(Token::StartOfLine); + } } Tok.setLocation(instLoc); @@ -687,12 +717,15 @@ bool TokenLexer::Lex(Token &Tok) { // If this is the first token, set the lexical properties of the token to // match the lexical properties of the macro identifier. if (isFirstToken) { - Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); + // Preserve StartOfLine flag if already set (e.g., for macro arguments) + if (!Tok.isAtStartOfLine()) { + Tok.setFlagValue(Token::StartOfLine, AtStartOfLine); + } Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); } else { // If this is not the first token, we may still need to pass through // leading whitespace if we've expanded a macro. - if (AtStartOfLine) Tok.setFlag(Token::StartOfLine); + if (AtStartOfLine || Tok.isAtStartOfLine()) Tok.setFlag(Token::StartOfLine); if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace); } AtStartOfLine = false; @@ -900,16 +933,33 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef TokenStream, // expanded from the full ## expression. Pull this information together into // a new SourceLocation that captures all of this. SourceManager &SM = PP.getSourceManager(); - if (StartLoc.isFileID()) + // Only convert FileID to expansion loc if it's from the macro definition. + // Macro argument tokens have FileID locations that are NOT from + // MacroDefStart. + if (StartLoc.isFileID() && + SM.isInSLocAddrSpace(StartLoc, MacroDefStart, MacroDefLength)) StartLoc = getExpansionLocForMacroDefLoc(StartLoc); - if (EndLoc.isFileID()) + if (EndLoc.isFileID() && + SM.isInSLocAddrSpace(EndLoc, MacroDefStart, MacroDefLength)) EndLoc = getExpansionLocForMacroDefLoc(EndLoc); FileID MacroFID = SM.getFileID(MacroExpansionStart); - while (SM.getFileID(StartLoc) != MacroFID) + // Only walk expansion ranges for MacroID locations. + // FileID locations (from macro arguments) don't have expansion ranges. + // Must check isMacroID() in the loop condition because + // getImmediateExpansionRange might return a FileID location, which would fail + // on the next iteration. + while (StartLoc.isMacroID() && SM.getFileID(StartLoc) != MacroFID) StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin(); - while (SM.getFileID(EndLoc) != MacroFID) + while (EndLoc.isMacroID() && SM.getFileID(EndLoc) != MacroFID) EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); + // If we exited the loop with FileID locations (from macro arguments), + // fall back to using MacroExpansionStart. + if (StartLoc.isFileID() && SM.getFileID(StartLoc) != MacroFID) + StartLoc = MacroExpansionStart; + if (EndLoc.isFileID() && SM.getFileID(EndLoc) != MacroFID) + EndLoc = MacroExpansionStart; + LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, LHSTok.getLength())); @@ -985,118 +1035,6 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { return MacroExpansionStart.getLocWithOffset(relativeOffset); } -/// Finds the tokens that are consecutive (from the same FileID) -/// creates a single SLocEntry, and assigns SourceLocations to each token that -/// point to that SLocEntry. e.g for -/// assert(foo == bar); -/// There will be a single SLocEntry for the "foo == bar" chunk and locations -/// for the 'foo', '==', 'bar' tokens will point inside that chunk. -/// -/// \arg begin_tokens will be updated to a position past all the found -/// consecutive tokens. -static void updateConsecutiveMacroArgTokens(SourceManager &SM, - SourceLocation ExpandLoc, - Token *&begin_tokens, - Token * end_tokens) { - assert(begin_tokens + 1 < end_tokens); - SourceLocation BeginLoc = begin_tokens->getLocation(); - llvm::MutableArrayRef All(begin_tokens, end_tokens); - llvm::MutableArrayRef Partition; - - auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable { - // The maximum distance between two consecutive tokens in a partition. - // This is an important trick to avoid using too much SourceLocation address - // space! - static constexpr SourceLocation::IntTy MaxDistance = 50; - auto Distance = Loc.getRawEncoding() - Last.getRawEncoding(); - Last = Loc; - return Distance <= MaxDistance; - }; - - // Partition the tokens by their FileID. - // This is a hot function, and calling getFileID can be expensive, the - // implementation is optimized by reducing the number of getFileID. - if (BeginLoc.isFileID()) { - // Consecutive tokens not written in macros must be from the same file. - // (Neither #include nor eof can occur inside a macro argument.) - Partition = All.take_while([&](const Token &T) { - return T.getLocation().isFileID() && NearLast(T.getLocation()); - }); - } else { - // Call getFileID once to calculate the bounds, and use the cheaper - // sourcelocation-against-bounds comparison. - FileID BeginFID = SM.getFileID(BeginLoc); - SourceLocation Limit = - SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID)); - Partition = All.take_while([&](const Token &T) { - // NOTE: the Limit is included! The lexer recovery only ever inserts a - // single token past the end of the FileID, specifically the ) when a - // macro-arg containing a comma should be guarded by parentheses. - // - // It is safe to include the Limit here because SourceManager allocates - // FileSize + 1 for each SLocEntry. - // - // See https://github.com/llvm/llvm-project/issues/60722. - return T.getLocation() >= BeginLoc && T.getLocation() <= Limit - && NearLast(T.getLocation()); - }); - } - assert(!Partition.empty()); - - // For the consecutive tokens, find the length of the SLocEntry to contain - // all of them. - SourceLocation::UIntTy FullLength = - Partition.back().getEndLoc().getRawEncoding() - - Partition.front().getLocation().getRawEncoding(); - // Create a macro expansion SLocEntry that will "contain" all of the tokens. - SourceLocation Expansion = - SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength); - -#ifdef EXPENSIVE_CHECKS - assert(llvm::all_of(Partition.drop_front(), - [&SM, ID = SM.getFileID(Partition.front().getLocation())]( - const Token &T) { - return ID == SM.getFileID(T.getLocation()); - }) && - "Must have the same FIleID!"); -#endif - // Change the location of the tokens from the spelling location to the new - // expanded location. - for (Token& T : Partition) { - SourceLocation::IntTy RelativeOffset = - T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding(); - T.setLocation(Expansion.getLocWithOffset(RelativeOffset)); - } - begin_tokens = &Partition.back() + 1; -} - -/// Creates SLocEntries and updates the locations of macro argument -/// tokens to their new expanded locations. -/// -/// \param ArgIdSpellLoc the location of the macro argument id inside the macro -/// definition. -void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, - Token *begin_tokens, - Token *end_tokens) { - SourceManager &SM = PP.getSourceManager(); - - SourceLocation ExpandLoc = - getExpansionLocForMacroDefLoc(ArgIdSpellLoc); - - while (begin_tokens < end_tokens) { - // If there's only one token just create a SLocEntry for it. - if (end_tokens - begin_tokens == 1) { - Token &Tok = *begin_tokens; - Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), - ExpandLoc, - Tok.getLength())); - return; - } - - updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens); - } -} - void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { AtStartOfLine = Result.isAtStartOfLine(); HasLeadingSpace = Result.hasLeadingSpace();