From f7e79ed934bcb05548b6a4d303d6ed6284648f35 Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti Date: Fri, 1 Aug 2025 01:19:18 -0700 Subject: [PATCH 1/5] [libtooling] Insert headers in global module fragment --- .../lib/Tooling/Inclusions/HeaderIncludes.cpp | 22 ++++- .../unittests/Tooling/HeaderIncludesTest.cpp | 81 +++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index 2b5a293b35841..2ad0c8b1ff135 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -74,6 +74,15 @@ void skipComments(Lexer &Lex, Token &Tok) { return; } +bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex, + Token &Tok) { + bool Matched = Tok.is(tok::raw_identifier) && + Tok.getRawIdentifier() == "module" && + !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) && + !Lex.LexFromRawLexer(Tok); + return Matched; +} + // Returns the offset after header guard directives and any comments // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no // header guard is present in the code, this will return the offset after @@ -95,7 +104,17 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, return std::max(InitialOffset, Consume(SM, Lex, Tok)); }); }; - return std::max( + + auto ModuleDecl = ConsumeHeaderGuardAndComment( + [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { + if (checkAndConsumeModuleDecl(SM, Lex, Tok)) { + skipComments(Lex, Tok); + return SM.getFileOffset(Tok.getLocation()); + } + return 0; + }); + + auto HeaderAndPPOffset = std::max( // #ifndef/#define ConsumeHeaderGuardAndComment( [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { @@ -115,6 +134,7 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, return SM.getFileOffset(Tok.getLocation()); return 0; })); + return std::max(HeaderAndPPOffset, ModuleDecl); } // Check if a sequence of tokens is like diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp index 929156a11d0d9..9308513225d9a 100644 --- a/clang/unittests/Tooling/HeaderIncludesTest.cpp +++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp @@ -594,6 +594,87 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) { EXPECT_EQ(Expected, remove(Code, "\"b.h\"")); } +TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) { + // Ensure header insertions go only in the global module fragment + std::string Code = R"cpp(// comments + +// more comments + +module; +export module foo; + +int main() { + std::vector ints {}; +})cpp"; + std::string Expected = R"cpp(// comments + +// more comments + +module; +#include +export module foo; + +int main() { + std::vector ints {}; +})cpp"; + + auto InsertedCode = insert(Code, ""); + EXPECT_EQ(Expected, insert(Code, "")); +} + +TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) { + // Ensure header insertions go only in the global module fragment + std::string Code = R"cpp(// comments + +// more comments + +// some more comments + +module; + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// comment + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// more comment + +int main() { + std::vector ints {}; +})cpp"; + std::string Expected = R"cpp(// comments + +// more comments + +// some more comments + +module; + +#include +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// comment + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// more comment + +int main() { + std::vector ints {}; +})cpp"; + + EXPECT_EQ(Expected, insert(Code, "")); +} + } // namespace } // namespace tooling } // namespace clang From 973ace64a6af7c8664c3f1b48660989e3b871b2c Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti Date: Sat, 2 Aug 2025 00:03:51 -0700 Subject: [PATCH 2/5] code review --- .../unittests/Tooling/HeaderIncludesTest.cpp | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp index 9308513225d9a..befe4a3dd5a8a 100644 --- a/clang/unittests/Tooling/HeaderIncludesTest.cpp +++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp @@ -675,6 +675,67 @@ int main() { EXPECT_EQ(Expected, insert(Code, "")); } +TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPPIncludes) { + // Ensure header insertions go only in the global module fragment + std::string Code = R"cpp(// comments + +// more comments + +// some more comments + +module; + +#include "header.h" + +#include + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// comment + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// more comment + +int main() { + std::vector ints {}; +})cpp"; + std::string Expected = R"cpp(// comments + +// more comments + +// some more comments + +module; + +#include "header.h" + +#include +#include + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// comment + +#ifndef MACRO_NAME +#define MACRO_NAME +#endif + +// more comment + +int main() { + std::vector ints {}; +})cpp"; + + EXPECT_EQ(Expected, insert(Code, "")); +} + } // namespace } // namespace tooling } // namespace clang From e0449388177ee0340886248f2b393b6ebd7a926d Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti Date: Sat, 2 Aug 2025 22:12:37 -0700 Subject: [PATCH 3/5] code review --- .../lib/Tooling/Inclusions/HeaderIncludes.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index 2ad0c8b1ff135..a9fc09411dfbf 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -83,13 +83,14 @@ bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex, return Matched; } -// Returns the offset after header guard directives and any comments -// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no -// header guard is present in the code, this will return the offset after -// skipping all comments from the start of the code. -unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, - StringRef Code, - const IncludeStyle &Style) { +// If file does not use modules, returns the offset after header guard +// directives and any comments before/after header guards (e.g. +// `#ifndef/#define` pair, `#pragma once`). If no header guard is present in the +// code, this will return the offset after skipping all comments from the start +// of the code. If modules are in use, the offset will be in the global module +// fragment. +unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code, + const IncludeStyle &Style) { // \p Consume returns location after header guard or 0 if no header guard is // found. auto ConsumeHeaderGuardAndComment = @@ -300,8 +301,7 @@ const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern); HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, const IncludeStyle &Style) : FileName(FileName), Code(Code), FirstIncludeOffset(-1), - MinInsertOffset( - getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), + MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)), MaxInsertOffset(MinInsertOffset + getMaxHeaderInsertionOffset( FileName, Code.drop_front(MinInsertOffset), Style)), From dc275e95b554f2822419c2704771cc9101aa7393 Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti Date: Sat, 2 Aug 2025 22:38:35 -0700 Subject: [PATCH 4/5] update comment for `getMinHeaderInsertionOffset` --- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index a9fc09411dfbf..18266f96c3d38 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -83,12 +83,11 @@ bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex, return Matched; } -// If file does not use modules, returns the offset after header guard -// directives and any comments before/after header guards (e.g. -// `#ifndef/#define` pair, `#pragma once`). If no header guard is present in the -// code, this will return the offset after skipping all comments from the start -// of the code. If modules are in use, the offset will be in the global module -// fragment. +// Determines the minimum offset into the file where we want to insert header +// includes. This will be put (when available): +// - after `#pragma once` +// - after header guards (`#ifdef` and `#define`) +// - after opening global module (`module;`) unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code, const IncludeStyle &Style) { // \p Consume returns location after header guard or 0 if no header guard is From 908392a23a3797b78171d3c69f3502ece71f4d39 Mon Sep 17 00:00:00 2001 From: Mythreya Kuricheti Date: Sat, 2 Aug 2025 23:20:13 -0700 Subject: [PATCH 5/5] Add comment from code review --- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index 18266f96c3d38..e11319e99ba6a 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -88,6 +88,8 @@ bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex, // - after `#pragma once` // - after header guards (`#ifdef` and `#define`) // - after opening global module (`module;`) +// - after any comments at the start of the file or immediately following one of +// the above constructs unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code, const IncludeStyle &Style) { // \p Consume returns location after header guard or 0 if no header guard is