diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 198dd01b8d07a..fe7a17b8d2d09 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -499,6 +499,9 @@ class ASTWriter : public ASTDeserializationListener, /// Mapping from a source location entry to whether it is affecting or not. llvm::BitVector IsSLocAffecting; + /// Mapping from a source location entry to whether it must be included as + /// input file. + llvm::BitVector IsSLocFileEntryAffecting; /// Mapping from \c FileID to an index into the FileID adjustment table. std::vector NonAffectingFileIDs; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 99e492ee4f7e1..e38d3ead67ba0 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5866,6 +5866,12 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, } CurrentModule->Kind = Kind; + // Note that we may be rewriting an existing location and it is important + // to keep doing that. In particular, we would like to prefer a + // `DefinitionLoc` loaded from the module file instead of the location + // created in the current source manager, because it allows the new + // location to be marked as "unaffecting" when writing and avoid creating + // duplicate locations for the same module map file. CurrentModule->DefinitionLoc = DefinitionLoc; CurrentModule->Signature = F.Signature; CurrentModule->IsFromModuleFile = true; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3b174cb539ebd..72b0ace7a896c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -38,6 +38,7 @@ #include "clang/AST/TypeLocVisitor.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileEntry.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/IdentifierTable.h" @@ -81,6 +82,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" @@ -166,7 +168,12 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) { namespace { -std::optional> +struct AffectingModuleMaps { + llvm::DenseSet DefinitionFileIDs; + llvm::DenseSet DefinitionFiles; +}; + +std::optional GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { if (!PP.getHeaderSearchInfo() .getHeaderSearchOpts() @@ -174,10 +181,12 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { return std::nullopt; const HeaderSearch &HS = PP.getHeaderSearchInfo(); + const SourceManager &SM = PP.getSourceManager(); const ModuleMap &MM = HS.getModuleMap(); - std::set ModuleMaps; - std::set ProcessedModules; + llvm::DenseSet ModuleMaps; + + llvm::DenseSet ProcessedModules; auto CollectModuleMapsForHierarchy = [&](const Module *M) { M = M->getTopLevelModule(); @@ -192,13 +201,13 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { // The containing module map is affecting, because it's being pointed // into by Module::DefinitionLoc. - if (auto FE = MM.getContainingModuleMapFile(Mod)) - ModuleMaps.insert(*FE); + if (auto F = MM.getContainingModuleMapFileID(Mod); F.isValid()) + ModuleMaps.insert(F); // For inferred modules, the module map that allowed inferring is not // related to the virtual containing module map file. It did affect the // compilation, though. - if (auto FE = MM.getModuleMapFileForUniquing(Mod)) - ModuleMaps.insert(*FE); + if (auto UniqF = MM.getModuleMapFileIDForUniquing(Mod); UniqF.isValid()) + ModuleMaps.insert(UniqF); for (auto *SubM : Mod->submodules()) Q.push(SubM); @@ -268,7 +277,16 @@ GetAffectingModuleMaps(const Preprocessor &PP, Module *RootModule) { // just ban module map hierarchies where module map defining a (sub)module X // includes a module map defining a module that's not a submodule of X. - return ModuleMaps; + llvm::DenseSet ModuleFileEntries; + for (FileID MM : ModuleMaps) { + if (auto *FE = SM.getFileEntryForID(MM)) + ModuleFileEntries.insert(FE); + } + + AffectingModuleMaps R; + R.DefinitionFileIDs = std::move(ModuleMaps); + R.DefinitionFiles = std::move(ModuleFileEntries); + return std::move(R); } class ASTTypeWriter { @@ -1773,14 +1791,17 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr, continue; // Do not emit input files that do not affect current module. - if (!IsSLocAffecting[I]) + if (!IsSLocFileEntryAffecting[I]) continue; InputFileEntry Entry(*Cache->OrigEntry); Entry.IsSystemFile = isSystem(File.getFileCharacteristic()); Entry.IsTransient = Cache->IsTransient; Entry.BufferOverridden = Cache->BufferOverridden; - Entry.IsTopLevel = getAffectingIncludeLoc(SourceMgr, File).isInvalid(); + + FileID IncludeFileID = SourceMgr.getFileID(File.getIncludeLoc()); + Entry.IsTopLevel = IncludeFileID.isInvalid() || IncludeFileID.ID < 0 || + !IsSLocFileEntryAffecting[IncludeFileID.ID]; Entry.IsModuleMap = isModuleMap(File.getFileCharacteristic()); uint64_t ContentHash = 0; @@ -4925,6 +4946,7 @@ void ASTWriter::computeNonAffectingInputFiles() { unsigned N = SrcMgr.local_sloc_entry_size(); IsSLocAffecting.resize(N, true); + IsSLocFileEntryAffecting.resize(N, true); if (!WritingModule) return; @@ -4961,10 +4983,12 @@ void ASTWriter::computeNonAffectingInputFiles() { continue; // Don't prune module maps that are affecting. - if (llvm::is_contained(*AffectingModuleMaps, *Cache->OrigEntry)) + if (AffectingModuleMaps->DefinitionFileIDs.contains(FID)) continue; IsSLocAffecting[I] = false; + IsSLocFileEntryAffecting[I] = + AffectingModuleMaps->DefinitionFiles.contains(*Cache->OrigEntry); FileIDAdjustment += 1; // Even empty files take up one element in the offset table. diff --git a/clang/test/Modules/prune-non-affecting-module-map-repeated.cpp b/clang/test/Modules/prune-non-affecting-module-map-repeated.cpp new file mode 100644 index 0000000000000..2c09bc4b01b02 --- /dev/null +++ b/clang/test/Modules/prune-non-affecting-module-map-repeated.cpp @@ -0,0 +1,85 @@ +// Check that the same module map file passed to -fmodule-map-file *and* +// available from one of the `-fmodule-file` does not allocate extra source +// location space. This optimization is important for using module maps in +// large codebases to avoid running out of source location space. + +// RUN: rm -rf %t && mkdir %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules -fmodule-map-file=%t/base.map -fmodule-name=base -emit-module %t/base.map -o %t/base.pcm +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \ +// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod1.map \ +// RUN: -fmodule-file=%t/base.pcm \ +// RUN: -fmodule-name=mod1 -emit-module %t/mod1.map -o %t/mod1.pcm +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \ +// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod1.map -fmodule-map-file=%t/mod2.map \ +// RUN: -fmodule-file=%t/mod1.pcm \ +// RUN: -fmodule-name=mod2 -emit-module %t/mod2.map -o %t/mod2.pcm +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \ +// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod2.map -fmodule-map-file=%t/mod3.map \ +// RUN: -fmodule-file=%t/mod2.pcm \ +// RUN: -fmodule-name=mod3 -emit-module %t/mod3.map -o %t/mod3.pcm +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules \ +// RUN: -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod3.map -fmodule-map-file=%t/mod4.map \ +// RUN: -fmodule-file=%t/mod3.pcm \ +// RUN: -fmodule-name=mod4 -emit-module %t/mod4.map -o %t/mod4.pcm +// RUN: %clang_cc1 -xc++ -fmodules -fno-implicit-modules -fmodule-map-file=%t/base.map -fmodule-map-file=%t/mod4.map -fmodule-file=%t/mod4.pcm -fsyntax-only -verify %t/check_slocs.cc + +//--- base.map +module base { header "vector.h" } +//--- mod1.map +module mod1 { header "mod1.h" } +//--- mod2.map +module mod2 { header "mod2.h" } +//--- mod3.map +module mod3 { header "mod3.h" } +//--- mod4.map +module mod4 { header "mod4.h" } +//--- check_slocs.cc +#include "mod4.h" +#pragma clang __debug sloc_usage // expected-remark {{source manager location address space usage}} +// expected-note@* {{% of available space}} + +// Module map files files that were specified on the command line are entered twice (once when parsing command-line, once loaded from the .pcm) +// Those that not specified on the command line must be entered once. + +// expected-note@base.map:1 {{file entered 2 times}} +// expected-note@mod4.map:1 {{file entered 2 times}} +// expected-note@mod1.map:1 {{file entered 1 time}} +// expected-note@mod2.map:1 {{file entered 1 time}} +// expected-note@mod3.map:1 {{file entered 1 time}} +// expected-note@* + {{file entered}} + + +//--- vector.h +#ifndef VECTOR_H +#define VECTOR_H +#endif + +//--- mod1.h +#ifndef MOD1 +#define MOD1 +#include "vector.h" +int mod1(); +#endif +//--- mod2.h +#ifndef MOD2 +#define MOD2 +#include "vector.h" +#include "mod1.h" +int mod2(); +#endif +//--- mod3.h +#ifndef MOD3 +#define MOD3 +#include "vector.h" +#include "mod2.h" +int mod3(); +#endif +//--- mod4.h +#ifndef MOD4 +#define MOD4 +#include "vector.h" +#include "mod3.h" +int mod4(); +#endif