From 6550eecf945d5a8537242646ef17b49b49eff859 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 15 Oct 2024 16:29:37 -0700 Subject: [PATCH 1/2] [clang][modules] Timestamp PCM files when writing Clang uses timestamp files to track the last time an implicitly-built PCM file was verified to be up-to-date with regard to its inputs. With `-fbuild-session-{file,timestamp}=` and `-fmodules-validate-once-per-build-session` this reduces the number of times a PCM file is checked per "build session". The behavior I'm seeing with the current scheme is that when lots of Clang instances wait for the same PCM to be built, they race to validate it as soon as the file lock gets released, causing lots of concurrent IO. This patch makes it so that the timestamp is written by the same Clang instance responsible for building the PCM while still holding the lock. This makes it so that whenever a PCM file gets compiled, it's never re-validated in the same build session. I believe this is as sound as the current scheme. One thing to be aware of is that there might be a time interval between accessing input file N and writing the timestamp file, where changes to input files 0..(D); } + +void serialization::updateModuleTimestamp(StringRef ModuleFilename) { + // Overwrite the timestamp file contents so that file's mtime changes. + std::error_code EC; + llvm::raw_fd_ostream OS(ModuleFile::getTimestampFilename(ModuleFilename), EC, + llvm::sys::fs::OF_TextWithCRLF); + if (EC) + return; + OS << "Timestamp file\n"; + OS.close(); + OS.clear_error(); // Avoid triggering a fatal error. +} diff --git a/clang/lib/Serialization/ASTCommon.h b/clang/lib/Serialization/ASTCommon.h index 0230908d3e052..2a765eafe0895 100644 --- a/clang/lib/Serialization/ASTCommon.h +++ b/clang/lib/Serialization/ASTCommon.h @@ -15,6 +15,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" +#include "clang/Basic/LLVM.h" #include "clang/Serialization/ASTBitCodes.h" namespace clang { @@ -100,6 +101,8 @@ inline bool isPartOfPerModuleInitializer(const Decl *D) { return false; } +void updateModuleTimestamp(StringRef ModuleFilename); + } // namespace serialization } // namespace clang diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 60b708067dc59..7d9170e7f0b47 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4416,19 +4416,6 @@ bool ASTReader::isGlobalIndexUnavailable() const { !hasGlobalIndex() && TriedLoadingGlobalIndex; } -static void updateModuleTimestamp(ModuleFile &MF) { - // Overwrite the timestamp file contents so that file's mtime changes. - std::string TimestampFilename = MF.getTimestampFilename(); - std::error_code EC; - llvm::raw_fd_ostream OS(TimestampFilename, EC, - llvm::sys::fs::OF_TextWithCRLF); - if (EC) - return; - OS << "Timestamp file\n"; - OS.close(); - OS.clear_error(); // Avoid triggering a fatal error. -} - /// Given a cursor at the start of an AST file, scan ahead and drop the /// cursor into the start of the given block ID, returning false on success and /// true on failure. @@ -4707,7 +4694,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleKind Type, ImportedModule &M = Loaded[I]; if (M.Mod->Kind == MK_ImplicitModule && M.Mod->InputFilesValidationTimestamp < HSOpts.BuildSessionTimestamp) - updateModuleTimestamp(*M.Mod); + updateModuleTimestamp(M.Mod->FileName); } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 938d7b525cb95..c0fb7a0591be0 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4905,6 +4905,10 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, this->BaseDirectory.clear(); WritingAST = false; + + if (WritingModule) + updateModuleTimestamp(OutputFile); + if (ShouldCacheASTInMemory) { // Construct MemoryBuffer and update buffer manager. ModuleCache.addBuiltPCM(OutputFile, diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index e74a16b636802..ba78c9ef5af67 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -170,7 +170,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, NewModule->InputFilesValidationTimestamp = 0; if (NewModule->Kind == MK_ImplicitModule) { - std::string TimestampFilename = NewModule->getTimestampFilename(); + std::string TimestampFilename = + ModuleFile::getTimestampFilename(NewModule->FileName); llvm::vfs::Status Status; // A cached stat value would be fine as well. if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status)) From 27abdf7dff2032d904af9baf0a9d073e8210b47c Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 22 Oct 2024 09:02:58 -0700 Subject: [PATCH 2/2] Only write timestamp file with `-fmodules-validate-once-per-build-session` --- clang/lib/Serialization/ASTWriter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c0fb7a0591be0..494890284d2f2 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4906,7 +4906,9 @@ ASTFileSignature ASTWriter::WriteAST(Sema &SemaRef, StringRef OutputFile, WritingAST = false; - if (WritingModule) + if (WritingModule && SemaRef.PP.getHeaderSearchInfo() + .getHeaderSearchOpts() + .ModulesValidateOncePerBuildSession) updateModuleTimestamp(OutputFile); if (ShouldCacheASTInMemory) {