diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h index 4a343f2872d8d..f002f8645d3f6 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h @@ -55,15 +55,18 @@ enum class ScanningOptimizations { HeaderSearch = 1, /// Remove warnings from system modules. - SystemWarnings = 2, + SystemWarnings = (1 << 1), /// Remove unused -ivfsoverlay arguments. - VFS = 4, + VFS = (1 << 2), /// Canonicalize -D and -U options. - Macros = 8, + Macros = (1 << 3), - DSS_LAST_BITMASK_ENUM(Macros), + /// Ignore the compiler's working directory if it is safe. + IgnoreCWD = (1 << 4), + + DSS_LAST_BITMASK_ENUM(IgnoreCWD), Default = All }; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 2e97cac0796ce..73ed2ed09a43f 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -397,9 +397,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) { } } +static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) { + // Check if the command line input uses relative paths. + // It is not safe to ignore the current working directory if any of the + // command line inputs use relative paths. +#define IF_RELATIVE_RETURN_FALSE(PATH) \ + do { \ + if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \ + return false; \ + } while (0) + +#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \ + do { \ + if (llvm::any_of(PATHS, [](const auto &P) { \ + return !P.empty() && !llvm::sys::path::is_absolute(P); \ + })) \ + return false; \ + } while (0) + + // Header search paths. + const auto &HeaderSearchOpts = CI.getHeaderSearchOpts(); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot); + for (auto &Entry : HeaderSearchOpts.UserEntries) + if (Entry.IgnoreSysRoot) + IF_RELATIVE_RETURN_FALSE(Entry.Path); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath); + IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath); + for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(), + E = HeaderSearchOpts.PrebuiltModuleFiles.end(); + I != E;) { + auto Current = I++; + IF_RELATIVE_RETURN_FALSE(Current->second); + } + IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths); + IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles); + + // Preprocessor options. + const auto &PPOpts = CI.getPreprocessorOpts(); + IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes); + IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes); + IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude); + + // Frontend options. + const auto &FrontendOpts = CI.getFrontendOpts(); + for (const FrontendInputFile &Input : FrontendOpts.Inputs) { + if (Input.isBuffer()) + continue; // FIXME: Can this happen when parsing command-line? + + IF_RELATIVE_RETURN_FALSE(Input.getFile()); + } + IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles); + IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles); + IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile); + IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile); + + // Filesystem options. + const auto &FileSystemOpts = CI.getFileSystemOpts(); + IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir); + + // Codegen options. + const auto &CodeGenOpts = CI.getCodeGenOpts(); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir); + + // Sanitizer options. + IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles); + + // Coverage mappings. + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile); + IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile); + + // Dependency output options. + for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps) + IF_RELATIVE_RETURN_FALSE(ExtraDep.first); + + return true; +} + static std::string getModuleContextHash(const ModuleDeps &MD, const CowCompilerInvocation &CI, - bool EagerLoadModules, + bool EagerLoadModules, bool IgnoreCWD, llvm::vfs::FileSystem &VFS) { llvm::HashBuilder, llvm::endianness::native> HashBuilder; @@ -410,8 +492,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD, HashBuilder.add(getClangFullRepositoryVersion()); HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR); llvm::ErrorOr CWD = VFS.getCurrentWorkingDirectory(); - if (CWD) + auto &FSOpts = const_cast(CI.getFileSystemOpts()); + if (CWD && !IgnoreCWD) HashBuilder.add(*CWD); + else + FSOpts.WorkingDir.clear(); // Hash the BuildInvocation without any input files. SmallString<0> ArgVec; @@ -443,8 +528,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD, void ModuleDepCollector::associateWithContextHash( const CowCompilerInvocation &CI, ModuleDeps &Deps) { - Deps.ID.ContextHash = getModuleContextHash( - Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem()); + bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) && + isSafeToIgnoreCWD(CI); + Deps.ID.ContextHash = + getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD, + ScanInstance.getVirtualFileSystem()); bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second; (void)Inserted; assert(Inserted && "duplicate module mapping"); diff --git a/clang/test/ClangScanDeps/modules-context-hash-cwd.c b/clang/test/ClangScanDeps/modules-context-hash-cwd.c new file mode 100644 index 0000000000000..459d2c90debe6 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-context-hash-cwd.c @@ -0,0 +1,188 @@ +// Test current directory pruning when computing the context hash. + +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json +// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json +// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json +// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json +// It is not a typo to use cdb1.json for result2. We intend to use the same +// compilation database, but different clang-scan-deps optimize-args options. +// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json +// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json +// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json +// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json +// RUN: cat %t/result0.json %t/result1.json | FileCheck %s +// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT +// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH +// RUN: cat %t/result0.json %t/result5.json | FileCheck %s + +//--- cdb0.json.in +[{ + "directory": "DIR", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +//--- cdb1.json.in +[{ + "directory": "DIR/a", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +// cdb2 is skipped because we reuse cdb1. + +//--- cdb3.json.in +[{ + "directory": "DIR", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ", + "file": "DIR/tu.c" +}] + +//--- cdb4.json.in +[{ + "directory": "DIR/a/", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ", + "file": "DIR/tu.c" +}] + +//--- cdb5.json.in +[{ + "directory": "DIR", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o", + "file": "DIR/tu.c" +}] + +//--- include/module.modulemap +module mod { + header "mod.h" +} + +//--- include/mod.h + +//--- tu.c +#include "mod.h" + +// Check that result0 and result1/result5 compute the same hash with +// optimization on. The only difference between result0 and result1/result5 is +// the compiler's working directory. +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK: "context-hash": "[[HASH:.*]]", +// CHECK: } +// CHECK: "translation-units": [ +// CHECK: { +// CHECK: "commands": [ +// CHECK: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH]]", +// CHECK-NEXT: "module-name": "mod" +// CHECK: } +// CHECK: ], +// CHECK: { +// CHECK-NEXT: "modules": [ +// CHECK-NEXT: { +// CHECK-NEXT: "clang-module-deps": [], +// CHECK: "context-hash": "[[HASH]]", +// CHECK: } +// CHECK: "translation-units": [ +// CHECK: { +// CHECK: "commands": [ +// CHECK: { +// CHECK-NEXT: "clang-context-hash": "{{.*}}", +// CHECK-NEXT: "clang-module-deps": [ +// CHECK-NEXT: { +// CHECK-NEXT: "context-hash": "[[HASH]]", +// CHECK-NEXT: "module-name": "mod" +// CHECK: } +// CHECK: ], + +// Check that result0 and result2 compute different hashes because +// the working directory optmization is turned off for result2. +// SKIPOPT: { +// SKIPOPT-NEXT: "modules": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "clang-module-deps": [], +// SKIPOPT: "context-hash": "[[HASH0:.*]]", +// SKIPOPT: } +// SKIPOPT: "translation-units": [ +// SKIPOPT: { +// SKIPOPT: "commands": [ +// SKIPOPT: { +// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}", +// SKIPOPT-NEXT: "clang-module-deps": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "context-hash": "[[HASH0]]", +// SKIPOPT-NEXT: "module-name": "mod" +// SKIPOPT: } +// SKIPOPT: ], +// SKIPOPT: { +// SKIPOPT-NEXT: "modules": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NEXT: "clang-module-deps": [], +// SKIPOPT-NOT: "context-hash": "[[HASH0]]", +// SKIPOPT: "context-hash": "[[HASH2:.*]]", +// SKIPOPT: } +// SKIPOPT: "translation-units": [ +// SKIPOPT: { +// SKIPOPT: "commands": [ +// SKIPOPT: { +// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}", +// SKIPOPT-NEXT: "clang-module-deps": [ +// SKIPOPT-NEXT: { +// SKIPOPT-NOT: "context-hash": "[[HASH0]]", +// SKIPOPT-NEXT: "context-hash": "[[HASH2]]" +// SKIPOPT-NEXT: "module-name": "mod" +// SKIPOPT: } +// SKIPOPT: ], + +// Check that result3 and result4 contain different hashes because +// both have a same relative path as a command line input, and +// they are produced using different compiler working directories. +// RELPATH: { +// RELPATH-NEXT: "modules": [ +// RELPATH-NEXT: { +// RELPATH-NEXT: "clang-module-deps": [], +// RELPATH: "context-hash": "[[HASH3:.*]]", +// RELPATH: } +// RELPATH: "translation-units": [ +// RELPATH: { +// RELPATH: "commands": [ +// RELPATH: { +// RELPATH-NEXT: "clang-context-hash": "{{.*}}", +// RELPATH-NEXT: "clang-module-deps": [ +// RELPATH-NEXT: { +// RELPATH-NEXT: "context-hash": "[[HASH3]]", +// RELPATH-NEXT: "module-name": "mod" +// RELPATH: } +// RELPATH: ], +// RELPATH: { +// RELPATH-NEXT: "modules": [ +// RELPATH-NEXT: { +// RELPATH-NEXT: "clang-module-deps": [], +// RELPATH-NOT: "context-hash": "[[HASH3]]", +// RELPATH: "context-hash": "[[HASH4:.*]]", +// RELPATH: } +// RELPATH: "translation-units": [ +// RELPATH: { +// RELPATH: "commands": [ +// RELPATH: { +// RELPATH-NEXT: "clang-context-hash": "{{.*}}", +// RELPATH-NEXT: "clang-module-deps": [ +// RELPATH-NEXT: { +// RELPATH-NOT: "context-hash": "[[HASH3]]", +// RELPATH-NEXT: "context-hash": "[[HASH4]]" +// RELPATH-NEXT: "module-name": "mod" +// RELPATH: } +// RELPATH: ], + diff --git a/clang/test/ClangScanDeps/working-dir.m b/clang/test/ClangScanDeps/working-dir.m index a04f8c2486b98..c6b7b1988d3cf 100644 --- a/clang/test/ClangScanDeps/working-dir.m +++ b/clang/test/ClangScanDeps/working-dir.m @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json // RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \ -// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db +// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db // RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t // Check that there are two separate modules hashes. One for each working dir. diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 709dc513be281..8d429534a2007 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -164,6 +164,8 @@ static void ParseArgs(int argc, char **argv) { .Case("system-warnings", ScanningOptimizations::SystemWarnings) .Case("vfs", ScanningOptimizations::VFS) .Case("canonicalize-macros", ScanningOptimizations::Macros) + .Case("ignore-current-working-dir", + ScanningOptimizations::IgnoreCWD) .Case("all", ScanningOptimizations::All) .Default(std::nullopt); if (!Optimization) {