From 9cc149df92f035e025fd237662815a2ca28cc2f9 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 2 Nov 2023 14:35:30 -0700 Subject: [PATCH 1/2] [clang][modules] Track included files per submodule (cherry picked from commit 9debc58d5135fbde51967dfb076d0ec5d954df38) Conflicts: clang/include/clang/Lex/Preprocessor.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTReaderInternals.h clang/lib/Serialization/ASTWriter.cpp --- clang/include/clang/Basic/Module.h | 2 + clang/include/clang/Lex/Preprocessor.h | 38 ++++++++++++--- clang/lib/Serialization/ASTReader.cpp | 33 ++++++++----- clang/lib/Serialization/ASTWriter.cpp | 46 ++++++++++++++---- clang/test/Modules/per-submodule-includes.m | 53 +++++++++++++++++++++ 5 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 clang/test/Modules/per-submodule-includes.m diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 69a1de6f79b35..02552ed221036 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -498,6 +498,8 @@ class alignas(8) Module { /// to import but didn't because they are not direct uses. llvm::SmallSetVector UndeclaredUses; + llvm::DenseSet Includes; + /// A library or framework to link against when an entity from this /// module is used. struct LinkLibrary { diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index b1c648e647f41..8789febe04081 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1021,6 +1021,8 @@ class Preprocessor { /// The set of modules that are visible within the submodule. VisibleModuleSet VisibleModules; + /// The files that have been included. + IncludedFilesSet IncludedFiles; // FIXME: CounterValue? // FIXME: PragmaPushMacroInfo? }; @@ -1033,8 +1035,8 @@ class Preprocessor { /// in a submodule. SubmoduleState *CurSubmoduleState; - /// The files that have been included. - IncludedFilesSet IncludedFiles; + /// The files that have been included outside of (sub)modules. + IncludedFilesSet Includes; /// The set of top-level modules that affected preprocessing, but were not /// imported. @@ -1516,19 +1518,41 @@ class Preprocessor { /// Mark the file as included. /// Returns true if this is the first time the file was included. bool markIncluded(FileEntryRef File) { + bool AlreadyIncluded = alreadyIncluded(File); HeaderInfo.getFileInfo(File).IsLocallyIncluded = true; - return IncludedFiles.insert(File).second; + CurSubmoduleState->IncludedFiles.insert(File); + if (!BuildingSubmoduleStack.empty()) + BuildingSubmoduleStack.back().M->Includes.insert(File); + else if (Module *M = getCurrentModule()) + M->Includes.insert(File); + else + Includes.insert(File); + return !AlreadyIncluded; } /// Return true if this header has already been included. bool alreadyIncluded(FileEntryRef File) const { HeaderInfo.getFileInfo(File); - return IncludedFiles.count(File); + if (CurSubmoduleState->IncludedFiles.contains(File)) + return true; + // TODO: Do this more efficiently. + for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules()) + if (CurSubmoduleState->VisibleModules.isVisible(M)) + if (M->Includes.contains(File)) + return true; + return false; + } + + void markIncludedOnTopLevel(const FileEntry *File) { + Includes.insert(File); + CurSubmoduleState->IncludedFiles.insert(File); + } + + void markIncludedInModule(Module *M, const FileEntry *File) { + M->Includes.insert(File); } - /// Get the set of included files. - IncludedFilesSet &getIncludedFiles() { return IncludedFiles; } - const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; } + const IncludedFilesSet &getTopLevelIncludes() const { return Includes; } /// Return the name of the macro defined before \p Loc that has /// spelling \p Tokens. If there are multiple macros with same spelling, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 55c52154c4113..37fa3d6006d8d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2346,14 +2346,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HeaderFileInfo HFI; unsigned Flags = *d++; - OptionalFileEntryRef FE; - bool Included = (Flags >> 6) & 0x01; - if (Included) - if ((FE = getFile(key))) - // Not using \c Preprocessor::markIncluded(), since that would attempt to - // deserialize this header file info again. - Reader.getPreprocessor().getIncludedFiles().insert(*FE); - // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp. HFI.isImport |= (Flags >> 5) & 0x01; HFI.isPragmaOnce |= (Flags >> 4) & 0x01; @@ -2361,6 +2353,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HFI.LazyControllingMacro = Reader.getGlobalIdentifierID( M, endian::readNext(d)); + auto FE = getFile(key); + Preprocessor &PP = Reader.getPreprocessor(); + ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + + unsigned IncludedCount = + endian::readNext(d); + for (unsigned I = 0; I < IncludedCount; ++I) { + uint32_t LocalSMID = + endian::readNext(d); + if (!FE) + continue; + + if (LocalSMID == 0) { + PP.markIncludedOnTopLevel(*FE); + } else { + SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID); + Module *Mod = Reader.getSubmodule(GlobalSMID); + PP.markIncludedInModule(Mod, *FE); + } + } + assert((End - d) % 4 == 0 && "Wrong data length in HeaderFileInfo deserialization"); while (d != End) { @@ -2373,10 +2386,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, // implicit module import. SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID); Module *Mod = Reader.getSubmodule(GlobalSMID); - ModuleMap &ModMap = - Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap(); - if (FE || (FE = getFile(key))) { + if (FE) { // FIXME: NameAsWritten Module::Header H = {std::string(key.Filename), "", *FE}; ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 547497cbd87d9..c0d5d75f5c0fa 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2094,14 +2094,15 @@ namespace { llvm::PointerIntPair; struct data_type { - data_type(const HeaderFileInfo &HFI, bool AlreadyIncluded, + data_type(const HeaderFileInfo &HFI, + const std::vector &Includers, ArrayRef KnownHeaders, UnresolvedModule Unresolved) - : HFI(HFI), AlreadyIncluded(AlreadyIncluded), - KnownHeaders(KnownHeaders), Unresolved(Unresolved) {} + : HFI(HFI), Includers(Includers), KnownHeaders(KnownHeaders), + Unresolved(Unresolved) {} HeaderFileInfo HFI; - bool AlreadyIncluded; + std::vector Includers; SmallVector KnownHeaders; UnresolvedModule Unresolved; }; @@ -2124,6 +2125,12 @@ namespace { EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) { unsigned KeyLen = key.Filename.size() + 1 + 8 + 8; unsigned DataLen = 1 + sizeof(IdentifierID); + + DataLen += 4; + for (const Module *M : Data.Includers) + if (!M || Writer.getLocalOrImportedSubmoduleID(M)) + DataLen += 4; + for (auto ModInfo : Data.KnownHeaders) if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) DataLen += 4; @@ -2150,8 +2157,7 @@ namespace { endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; - unsigned char Flags = (Data.AlreadyIncluded << 6) - | (Data.HFI.isImport << 5) + unsigned char Flags = (Data.HFI.isImport << 5) | (Writer.isWritingStdCXXNamedModules() ? 0 : Data.HFI.isPragmaOnce << 4) | (Data.HFI.DirInfo << 1); @@ -2163,6 +2169,14 @@ namespace { LE.write( Writer.getIdentifierRef(Data.HFI.LazyControllingMacro.getPtr())); + LE.write(Data.Includers.size()); + for (const Module *M : Data.Includers) { + if (!M) + LE.write(0); + else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) + LE.write(ModID); + } + auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) { if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) { uint32_t Value = (ModID << 3) | (unsigned)Role; @@ -2234,7 +2248,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { HeaderFileInfoTrait::key_type Key = { FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0}; HeaderFileInfoTrait::data_type Data = { - Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}}; + Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}}; // FIXME: Deal with cases where there are multiple unresolved header // directives in different submodules for the same header. Generator.insert(Key, Data, GeneratorTrait); @@ -2274,13 +2288,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { SavedStrings.push_back(Filename.data()); } - bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File); + std::vector Includers; + if (WritingModule) { + llvm::DenseSet Seen; + std::function Visit = [&](const Module *M) { + if (!Seen.insert(M).second) + return; + if (M->Includes.contains(*File)) + Includers.push_back(M); + for (const Module *SubM : M->submodules()) + Visit(SubM); + }; + Visit(WritingModule); + } else if (PP->getTopLevelIncludes().contains(*File)) { + Includers.push_back(nullptr); + } HeaderFileInfoTrait::key_type Key = { Filename, File->getSize(), getTimestampForOutput(*File) }; HeaderFileInfoTrait::data_type Data = { - *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {} + *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {} }; Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; diff --git a/clang/test/Modules/per-submodule-includes.m b/clang/test/Modules/per-submodule-includes.m new file mode 100644 index 0000000000000..39fd255e8e93e --- /dev/null +++ b/clang/test/Modules/per-submodule-includes.m @@ -0,0 +1,53 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- frameworks/Textual.framework/Headers/Header.h +static int symbol; + +//--- frameworks/FW.framework/Modules/module.modulemap +framework module FW { + umbrella header "FW.h" + export * + module * { export * } +} +//--- frameworks/FW.framework/Headers/FW.h +#import +#import +//--- frameworks/FW.framework/Headers/Sub1.h +//--- frameworks/FW.framework/Headers/Sub2.h +#import + +//--- pch.modulemap +module __PCH { + header "pch.h" + export * +} +//--- pch.h +#import + +//--- tu.m +#import +int fn() { return symbol; } + +// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the +// PCH is treated textually due to -fmodule-name=FW. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \ +// RUN: -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \ +// RUN: -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m + +// Compilation using the PCH as precompiled module fails. The import of FW/Sub1.h +// in the PCH is translated to an import. Nothing is preventing that now that +// -fmodule-name=FW has been replaced with -fmodule-name=__PCH. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \ +// RUN: -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o %t/pch.h.pcm +// +// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in +// FW.Sub2), so the TU does not import it again. It's contents remain invisible, +// though. +// +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fimplicit-module-maps -F %t/frameworks \ +// RUN: -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only %t/tu.m From bfa1e02a58f9ecf6077a748f34b9b0d9a6eca692 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Mon, 1 Dec 2025 14:07:07 -0800 Subject: [PATCH 2/2] Move the included vector into HeaderFileInfoTrait::data_type and add a test case. --- clang/lib/Serialization/ASTWriter.cpp | 21 +++--- .../Modules/import-submodule-visibility.c | 64 +++++++++++++++++++ 2 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 clang/test/Modules/import-submodule-visibility.c diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c0d5d75f5c0fa..fd48b8bbe033a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2095,11 +2095,11 @@ namespace { struct data_type { data_type(const HeaderFileInfo &HFI, - const std::vector &Includers, + std::vector &&Includers, ArrayRef KnownHeaders, UnresolvedModule Unresolved) - : HFI(HFI), Includers(Includers), KnownHeaders(KnownHeaders), - Unresolved(Unresolved) {} + : HFI(HFI), Includers(std::move(Includers)), + KnownHeaders(KnownHeaders), Unresolved(Unresolved) {} HeaderFileInfo HFI; std::vector Includers; @@ -2157,10 +2157,11 @@ namespace { endian::Writer LE(Out, llvm::endianness::little); uint64_t Start = Out.tell(); (void)Start; - unsigned char Flags = (Data.HFI.isImport << 5) - | (Writer.isWritingStdCXXNamedModules() ? 0 : - Data.HFI.isPragmaOnce << 4) - | (Data.HFI.DirInfo << 1); + unsigned char Flags = + (Data.HFI.isImport << 5) | + (Writer.isWritingStdCXXNamedModules() ? 0 + : Data.HFI.isPragmaOnce << 4) | + (Data.HFI.DirInfo << 1); LE.write(Flags); if (Data.HFI.LazyControllingMacro.isID()) @@ -2308,8 +2309,10 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { Filename, File->getSize(), getTimestampForOutput(*File) }; HeaderFileInfoTrait::data_type Data = { - *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), {} - }; + *HFI, + std::move(Includers), + HS.getModuleMap().findResolvedModulesForHeader(*File), + {}}; Generator.insert(Key, Data, GeneratorTrait); ++NumHeaderSearchEntries; } diff --git a/clang/test/Modules/import-submodule-visibility.c b/clang/test/Modules/import-submodule-visibility.c new file mode 100644 index 0000000000000..3491494ea7cc0 --- /dev/null +++ b/clang/test/Modules/import-submodule-visibility.c @@ -0,0 +1,64 @@ +// This test checks that imports of headers that appeared in a different submodule than +// what is imported by the current TU don't affect the compilation. + +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- C/C.h +#include "Textual.h" +//--- C/module.modulemap +module C { header "C.h" } + +//--- D/D1.h +#include "Textual.h" +//--- D/D2.h +//--- D/module.modulemap +module D { + module D1 { header "D1.h" } + module D2 { header "D2.h" } +} + +//--- E/E1.h +#include "E2.h" +//--- E/E2.h +#include "Textual.h" +//--- E/module.modulemap +module E { + module E1 { header "E1.h" } + module E2 { header "E2.h" } +} + +//--- Textual.h +#define MACRO_TEXTUAL 1 + +//--- test_top.c +#import "Textual.h" +static int x = MACRO_TEXTUAL; + +//--- test_sub.c +#import "D/D2.h" +#import "Textual.h" +static int x = MACRO_TEXTUAL; + +//--- test_transitive.c +#import "E/E1.h" +#import "Textual.h" +static int x = MACRO_TEXTUAL; + +// Simply loading a PCM file containing top-level module including a header does +// not prevent inclusion of that header in the TU. +// +// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/C/module.modulemap -fmodule-name=C -o %t/C.pcm +// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_top.c -fmodule-file=%t/C.pcm + +// Loading a PCM file and importing its empty submodule does not prevent +// inclusion of headers included by invisible sibling submodules. +// +// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/D/module.modulemap -fmodule-name=D -o %t/D.pcm +// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_sub.c -fmodule-file=%t/D.pcm + +// Loading a PCM file and importing a submodule does not prevent inclusion of +// headers included by some of its transitive un-exported dependencies. +// +// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/E/module.modulemap -fmodule-name=E -o %t/E.pcm +// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_transitive.c -fmodule-file=%t/E.pcm