[Comgr][Cache] Cache source-code->bitcode actions (llvm#1664)

jmmartinez · web-flow · commit 9ff03719e5e2 · 2025-07-30T17:36:08.000+02:00
diff --git a/amd/comgr/src/comgr-clang-command.cpp b/amd/comgr/src/comgr-clang-command.cpp
@@ -91,9 +91,6 @@ SmallVector<StringRef, 1> getInputFiles(driver::Command &Command) {
   return Paths;
 }
 
-bool isSourceCodeInput(const driver::InputInfo &II) {
-  return driver::types::isSrcFile(II.getType());
-}
 } // namespace
 ClangCommand::ClangCommand(driver::Command &Command,
                            DiagnosticOptions &DiagOpts, vfs::FileSystem &VFS,
@@ -150,13 +147,7 @@ bool ClangCommand::canCache() const {
   bool HasOneOutput = Command.getOutputFilenames().size() == 1;
   bool IsPreprocessorCommand = getClass() == driver::Action::PreprocessJobClass;
 
-  // This reduces the applicability of the cache, but it helps us deliver
-  // something now and deal with the PCH issues later. The cache would still
-  // help for spirv compilation (e.g. bitcode->asm) and for intermediate
-  // compilation steps
-  bool HasSourceCodeInput = any_of(Command.getInputInfos(), isSourceCodeInput);
-
-  return HasOneOutput && !IsPreprocessorCommand && !HasSourceCodeInput &&
+  return HasOneOutput && !IsPreprocessorCommand &&
          !hasDebugOrProfileInfo(Command.getArguments());
 }
 
diff --git a/amd/comgr/src/comgr-compiler.cpp b/amd/comgr/src/comgr-compiler.cpp
@@ -977,7 +977,8 @@ amd_comgr_status_t AMDGPUCompiler::addIncludeFlags() {
   if (none_of(InSet->DataObjects, needsPreprocessing))
     return AMD_COMGR_STATUS_SUCCESS;
 
-  switch (ActionInfo->Language) {
+  amd_comgr_language_t Language = ActionInfo->Language;
+  switch (Language) {
   case AMD_COMGR_LANGUAGE_OPENCL_1_2:
   case AMD_COMGR_LANGUAGE_OPENCL_2_0: {
     SmallString<128> OpenCLCBasePath = IncludeDir;
@@ -1019,6 +1020,26 @@ amd_comgr_status_t AMDGPUCompiler::addIncludeFlags() {
     Args.push_back("-fno-validate-pch");
   }
 
+  bool CacheEnabled = CommandCache::get(LogS) != nullptr;
+  if (PrecompiledHeaders.empty() && CacheEnabled) {
+    // The -no-integrated-cpp is used to split the preprocessing stage from the
+    // rest of the compilation jobs. The cache doesn't handle source-code input,
+    // but can handle preprocessed input (to avoid dealing with includes).
+    Args.push_back("-no-integrated-cpp");
+    // The -dD option is used to keep the #define directives in the preprocessed
+    // output. When -fdeclare-opencl-builtins is used, the opencl builtin
+    // semantic analysis queries the preprocessor for macro definitions that
+    // signal that an OpenCL feature is enabled. After preprocessing these
+    // #define are gone, so the semantic analysis during the compilation stage
+    // fails. This flag is used to keep them such that they are present during
+    // the compilation stage.
+    // Additionally, we need to keep the definitions for #pragma directives.
+    // The preprocessor doesn't expand macro identifiers in #pragmas, and if we
+    // do not pass -dD the definitions would be missing when clang parses the
+    // code
+    Args.push_back("-dD");
+  }
+
   return AMD_COMGR_STATUS_SUCCESS;
 }
 
diff --git a/amd/comgr/test-lit/cache-tests/compile-minimal-test-cached.cl b/amd/comgr/test-lit/cache-tests/compile-minimal-test-cached.cl
@@ -24,9 +24,9 @@
 // RUN: llvm-objdump -d %t_a.bin | FileCheck %S/../compile-minimal-test.cl
 // RUN: COUNT_BEFORE=$(ls "%t.cache" | wc -l)
 
-// COM: One element for the tag, one for bc->obj another for obj->exec. No
-// COM:    elements for src->bc since we currently not support it.
-// RUN: [ 3 -eq $COUNT_BEFORE ]
+// COM: One element for the tag, one for cli->bc, one for bc->obj another
+// COM: for obj->exec. No elements for src->cli since this is not supported.
+// RUN: [ 4 -eq $COUNT_BEFORE ]
 //
 // RUN: AMD_COMGR_CACHE_DIR=%t.cache compile-opencl-minimal \
 // RUN:    %S/../compile-minimal-test.cl %t_b.bin 1.2