diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e1a3cbf6b1ade..540d589bd270b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7557,6 +7557,10 @@ let Visibility = [SYCLRTCOnlyOption] in {
let Group = sycl_rtc_only_Group in {
def auto_pch : Flag<["--"], "auto-pch">,
HelpText<"Enable Auto-PCH for SYCL RTC Compilation">;
+ def persistent_auto_pch_EQ
+ : Joined<["--"], "persistent-auto-pch=">,
+ HelpText<"Use Persistent Auto-PCH cache located at
for SYCL "
+ "RTC Compilation">;
} // let Group = sycl_rtc_only_Group
} // let Visibility = [SYCLRTCOnlyOption]
diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h
index 4b3935abea411..9558cc9cb5259 100644
--- a/clang/include/clang/Frontend/PrecompiledPreamble.h
+++ b/clang/include/clang/Frontend/PrecompiledPreamble.h
@@ -132,6 +132,8 @@ class PrecompiledPreamble {
IntrusiveRefCntPtr &VFS,
llvm::MemoryBuffer *MainFileBuffer) const;
+ llvm::StringRef memoryContents() const;
+
private:
PrecompiledPreamble(std::unique_ptr Storage,
std::vector PreambleBytes,
diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp
index f53bf24adc76c..92fe2690e4fbb 100644
--- a/clang/lib/Frontend/PrecompiledPreamble.cpp
+++ b/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -727,6 +727,10 @@ void PrecompiledPreamble::OverridePreamble(
configurePreamble(Bounds, CI, VFS, MainFileBuffer);
}
+llvm::StringRef PrecompiledPreamble::memoryContents() const {
+ return Storage->memoryContents();
+}
+
PrecompiledPreamble::PrecompiledPreamble(
std::unique_ptr Storage, std::vector PreambleBytes,
bool PreambleEndsAtStartOfLine,
diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp
index 7caf761e05f91..a2c09f615209a 100644
--- a/clang/test/Driver/sycl-unsupported.cpp
+++ b/clang/test/Driver/sycl-unsupported.cpp
@@ -64,14 +64,20 @@
// UNSUPPORTED_OPT-NOT: clang{{.*}} "-fsycl-is-device"{{.*}} "[[OPT_CC1]]{{.*}}"
// UNSUPPORTED_OPT: clang{{.*}} "-fsycl-is-host"{{.*}} "[[OPT_CC1]]{{.*}}"
-// "--auto-pch" should only be enabled for SYCL RTC compilations, regular driver
-// shouldn't know about it:
+// Options that should only be enabled for SYCL RTC compilations, regular driver
+// shouldn't know about them:
//
// RUN: not %clangxx -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH
// RUN: not %clangxx -fsycl-device-only -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH
// RUN: not %clangxx -fsycl -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH
//
// AUTO_PCH: error: unknown argument: '--auto-pch'
+//
+// RUN: not %clangxx -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH
+// RUN: not %clangxx -fsycl-device-only -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH
+// RUN: not %clangxx -fsycl -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH
+//
+// PERSISTENT_AUTO_PCH: error: unknown argument: '--persistent-auto-pch={{.*}}'
// FPGA support has been removed, usage of any FPGA specific options and any
// options that have FPGA specific arguments should emit a specific error
diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
index 95180b10e1e9c..439196063bca8 100644
--- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
+++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp
@@ -46,6 +46,10 @@
#include
#include
#include
+#include
+#include
+#include
+#include
#include
#include
#include
@@ -67,7 +71,100 @@ using namespace llvm::vfs;
using namespace jit_compiler;
namespace {
+struct AutoPCHError : public ErrorInfo {
+public:
+ static char ID;
+
+ std::error_code convertToErrorCode() const override {
+ assert(false && "AutoPCHError doesn't support convertToErrorCode!");
+ return {};
+ }
+
+ void log(raw_ostream &OS) const override { OS << "auto-pch error"; }
+};
+
+char AutoPCHError::ID = 0;
+
+// This key is the same for both in-memory and persistent auto-pch.
+struct auto_pch_key {
+ std::string Opts;
+ std::string Preamble;
+
+ void update_hasher(BLAKE3 &Hasher) const {
+ Hasher.update(Opts);
+ Hasher.update(Preamble);
+ }
+
+ Error write(raw_pwrite_stream &OS) const {
+ AppendingBinaryByteStream Stream(llvm::endianness::little);
+ BinaryStreamWriter Writer(Stream);
+ if (auto Error = Writer.writeInteger(Opts.size()))
+ return Error;
+ if (auto Error = Writer.writeFixedString(Opts))
+ return Error;
+ if (auto Error = Writer.writeInteger(Preamble.size()))
+ return Error;
+ if (auto Error = Writer.writeFixedString(Preamble))
+ return Error;
+
+ OS.SetBuffered();
+ for (uint8_t x : Stream.data())
+ OS << x;
+ return Error::success();
+ }
+
+ Error read(llvm::BinaryStreamReader &Reader) {
+ (void)AutoPCHError::ID;
+ auto ReadStr = [&](std::string &Out) -> Error {
+ std::string::size_type StrLen;
+
+ if (auto Err = Reader.readInteger(StrLen))
+ return Err;
+
+ if (StrLen >= std::numeric_limits::max())
+ return make_error();
+
+ StringRef Str;
+ if (auto Err = Reader.readFixedString(Str, (uint32_t)StrLen))
+ return Err;
+
+ Out = Str.str();
+ return Error::success();
+ };
+
+ if (auto Err = ReadStr(Opts))
+ return Err;
+
+ return ReadStr(Preamble);
+ }
+
+ friend bool operator==(const auto_pch_key &lhs, const auto_pch_key &rhs) {
+ return std::tie(lhs.Opts, lhs.Preamble) == std::tie(rhs.Opts, rhs.Preamble);
+ }
+ friend bool operator!=(const auto_pch_key &lhs, const auto_pch_key &rhs) {
+ return !(lhs == rhs);
+ }
+ friend bool operator<(const auto_pch_key &lhs, const auto_pch_key &rhs) {
+ return std::tie(lhs.Opts, lhs.Preamble) < std::tie(rhs.Opts, rhs.Preamble);
+ }
+};
+} // namespace
+
+template <> struct std::hash {
+ size_t operator()(const auto_pch_key &key) const {
+ BLAKE3 Hasher;
+ key.update_hasher(Hasher);
+
+ // No `std::bit_cast` in c++17, emulate:
+ auto Hash = Hasher.result();
+ size_t Result;
+ static_assert(sizeof(Hash) == sizeof(size_t));
+ std::memcpy(&Result, &Hash, sizeof(size_t));
+ return Result;
+ }
+};
+namespace {
class SYCLToolchain {
// TODO: For some reason, moving this to a data member of the single instance
// of SYCLToolchain results in some data races leading to memory corruption
@@ -89,14 +186,13 @@ class SYCLToolchain {
SYCLToolchain() = default;
struct PrecompiledPreambles {
- using key = std::pair;
std::mutex Mutex;
- std::map> PreamblesMap;
+ std::map> PreamblesMap;
};
// Similar to FrontendActionFactory, but we don't take ownership of
- // `FrontendAction`, nor do we create copies of it as we only perform a single
- // `ToolInvocation`.
+ // `FrontendAction`, nor do we create copies of it as we only perform a
+ // single `ToolInvocation`.
class Action : public ToolAction {
FrontendAction &FEAction;
@@ -139,9 +235,9 @@ class SYCLToolchain {
DerivedArgList DAL{UserArgList};
const auto &OptTable = getDriverOptTable();
DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_fsycl_device_only));
- // User args may contain options not intended for the frontend, but we can't
- // claim them here to tell the driver they're used later. Hence, suppress
- // the unused argument warning.
+ // User args may contain options not intended for the frontend, but we
+ // can't claim them here to tell the driver they're used later. Hence,
+ // suppress the unused argument warning.
DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Qunused_arguments));
if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) {
@@ -175,12 +271,45 @@ class SYCLToolchain {
return CommandLine;
}
+ template
class ActionWithPCHPreamble : public Action {
std::string CmdLineOpts;
+ std::string PersistentPCHDir; // Empty if !Persistent.
+
+ static void addImplicitPersistentPreamble(
+ std::unique_ptr PrecompiledPreamble,
+ const PreambleBounds &Bounds, CompilerInvocation &CI,
+ IntrusiveRefCntPtr &VFS) {
+
+ // Processing similar to PrecompiledPreamble::configurePreamble.
+
+ auto &PreprocessorOpts = CI.getPreprocessorOpts();
+ PreprocessorOpts.PrecompiledPreambleBytes.first = Bounds.Size;
+ PreprocessorOpts.PrecompiledPreambleBytes.second =
+ Bounds.PreambleEndsAtStartOfLine;
+ PreprocessorOpts.DisablePCHOrModuleValidation =
+ DisableValidationForModuleKind::PCH;
+
+ std::string PCHPath = (SYCLToolchain::instance().getPrefix() +
+ "/remapped_persistent_preamble")
+ .str();
+ PreprocessorOpts.ImplicitPCHInclude = PCHPath;
+
+ auto PCHFS = llvm::makeIntrusiveRefCnt();
+ PCHFS->addFile(PCHPath, 0, std::move(PrecompiledPreamble));
+ auto OverlayFS =
+ llvm::makeIntrusiveRefCnt(VFS);
+ OverlayFS->pushOverlay(PCHFS);
+ VFS = std::move(OverlayFS);
+ }
public:
- ActionWithPCHPreamble(FrontendAction &FEAction, std::string &&CmdLineOpts)
- : Action(FEAction), CmdLineOpts(std::move(CmdLineOpts)) {}
+ ActionWithPCHPreamble(FrontendAction &FEAction, std::string &&CmdLineOpts,
+ std::string PersistentPCHDir = {})
+ : Action(FEAction), CmdLineOpts(std::move(CmdLineOpts)),
+ PersistentPCHDir(std::move(PersistentPCHDir)) {
+ assert(this->PersistentPCHDir.empty() || Persistent);
+ }
bool runInvocation(std::shared_ptr Invocation,
FileManager *Files,
@@ -193,62 +322,195 @@ class SYCLToolchain {
PreambleBounds Bounds = ComputePreambleBounds(
Invocation->getLangOpts(), **MainFileBuffer, 100 /* MaxLines */);
- PrecompiledPreambles::key key{
+ auto_pch_key key{
std::move(CmdLineOpts),
(*MainFileBuffer)->getBuffer().substr(0, Bounds.Size).str()};
- std::shared_ptr Preamble;
- {
- PrecompiledPreambles &Preambles = SYCLToolchain::instance().Preambles;
- std::lock_guard Lock{Preambles.Mutex};
- auto [It, Inserted] = Preambles.PreamblesMap.try_emplace(key);
-
- if (Inserted) {
- PreambleCallbacks Callbacks;
- auto DiagIds = llvm::makeIntrusiveRefCnt();
- auto DiagOpts = Invocation->getDiagnosticOpts();
- auto Diags = llvm::makeIntrusiveRefCnt(
- DiagIds, DiagOpts, DiagConsumer, false);
-
- static std::string StoragePath =
- (SYCLToolchain::instance().getPrefix() + "/preambles").str();
- llvm::ErrorOr NewPreamble =
- PrecompiledPreamble::Build(
- *Invocation, MainFileBuffer->get(), Bounds, Diags,
- Files->getVirtualFileSystemPtr(), PCHContainerOps,
- /*StorePreamblesInMemory*/ true, StoragePath, Callbacks,
- /*AllowASTWithErrors=*/false);
-
- if (!NewPreamble)
- return false;
-
- It->second = std::make_shared(
- std::move(NewPreamble.get()));
+ // In-memory for both `Persistent` and not because PrecompiledPreamble's
+ // `StorePreamblesInMemory==false` would create a *temporary* pch file
+ // on the file system, it will still be removed once preamble object
+ // dies.
+ auto BuildPreamble = [&]() {
+ PreambleCallbacks Callbacks;
+ auto DiagIds = llvm::makeIntrusiveRefCnt();
+ auto DiagOpts = Invocation->getDiagnosticOpts();
+ auto Diags = llvm::makeIntrusiveRefCnt(
+ DiagIds, DiagOpts, DiagConsumer, false);
+
+ static std::string StoragePath =
+ (SYCLToolchain::instance().getPrefix() + "/preambles").str();
+ return PrecompiledPreamble::Build(
+ *Invocation, MainFileBuffer->get(), Bounds, Diags,
+ Files->getVirtualFileSystemPtr(), PCHContainerOps,
+ /*StorePreamblesInMemory*/ true, StoragePath, Callbacks,
+ /*AllowASTWithErrors=*/false);
+ };
+
+ if constexpr (Persistent) {
+ BLAKE3 Hasher;
+ key.update_hasher(Hasher);
+
+ std::string EncodedHash = encodeBase64(Hasher.result());
+ // Make the encoding filesystem-friendly.
+ std::replace(EncodedHash.begin(), EncodedHash.end(), '/', '-');
+
+ // `llvm::localCache`'s API uses a callback to process cached data and
+ // the callback's return value (if any) is effectively ignored, so we
+ // need this extra `Success` variable to be able to properly return
+ // compilation status.
+ bool Success = false;
+ auto RunWithoutPCH = [&]() -> bool {
+ // Run original invocation:
+ Success =
+ Action::runInvocation(std::move(Invocation), Files,
+ std::move(PCHContainerOps), DiagConsumer);
+ return Success;
+ };
+
+ auto UseCachedPreamble = [&](StringRef PCHContent) {
+ std::unique_ptr PCHMemBuf =
+ MemoryBuffer::getMemBufferCopy(PCHContent);
+
+ auto VFS = Files->getVirtualFileSystemPtr();
+ addImplicitPersistentPreamble(std::move(PCHMemBuf), Bounds,
+ *Invocation, VFS);
+
+ auto NewFiles = makeIntrusiveRefCnt(
+ Files->getFileSystemOpts(), std::move(VFS));
+
+ Success =
+ Action::runInvocation(std::move(Invocation), NewFiles.get(),
+ std::move(PCHContainerOps), DiagConsumer);
+ return Success;
+ };
+
+ // `llvm::localCache` calls the callback on either succesful cache read
+ // or during "commit" if an entry is being created. The problem is that
+ // commit might fail and the callback won't be called at all. It's
+ // easier to just don't rely on it on cache miss and perform compilation
+ // with newly generated preamble ourselves.
+ bool CacheHit = true;
+
+ auto CacheCallback = [&](size_t, const Twine &,
+ std::unique_ptr MB) -> void {
+ if (!CacheHit)
+ return; // See above.
+
+ llvm::MemoryBufferByteStream MemBufStream{std::move(MB),
+ llvm::endianness::little};
+ llvm::BinaryStreamReader Reader(MemBufStream);
+
+ auto_pch_key persistent_key;
+ // In case of any errors reading the cache, treat it as a hash
+ // collision and just compile without using PCH.
+ if (errorToBool(persistent_key.read(Reader)))
+ return (void)RunWithoutPCH();
+
+ // Hash collision, **very** unlikely.
+ if (key != persistent_key)
+ return (void)RunWithoutPCH();
+
+ StringRef PCHStorage;
+
+ // This restriction is simply due to the `BinaryStreamReader|Writer`
+ // APIs. Pre-compiled preambles in tests seem to be low double digits
+ // megabytes which is well under 4GB limit imposed here.
+ if (Reader.bytesRemaining() >= std::numeric_limits::max())
+ return (void)RunWithoutPCH();
+ if (errorToBool(Reader.readFixedString(
+ PCHStorage, static_cast(Reader.bytesRemaining()))))
+ return (void)RunWithoutPCH();
+
+ return (void)UseCachedPreamble(PCHStorage);
+ };
+
+ auto CacheOrErr =
+ llvm::localCache("SYCL RTC Persistent Preambles", "syclrtc-tmp-",
+ PersistentPCHDir, CacheCallback);
+
+ assert(CacheOrErr && "Don't see any code path returning Error");
+ auto AddStreamOrErr = (*CacheOrErr)(0, EncodedHash, "");
+ if (!AddStreamOrErr) {
+ // Not a hit, but we won't be able to store the data in the cache, so
+ // no need to generate precompiled preamble.
+ consumeError(AddStreamOrErr.takeError());
+ return RunWithoutPCH();
}
+ auto &AddStream = *AddStreamOrErr;
+ if (!AddStream) {
+ // UseCachedPreamble was called by the cache after successfully
+ // reading persistent auto-pch file.
+ return Success;
+ }
+ CacheHit = false;
+
+ llvm::ErrorOr NewPreamble = BuildPreamble();
+ if (!NewPreamble) {
+ return false;
+ }
+
+ // We could have used `NewPreamble`'s `AddImplicitPreamble` (i.e., as on
+ // the in-memory/non-persistent path) here but I think it's better to
+ // use the same code on cache read/miss:
+ UseCachedPreamble(NewPreamble->memoryContents());
+
+ // Any errors updating the persistent preambles cache won't affect
+ // current compilation, so ignore any error below:
+
+ auto FileOrErr = AddStream(1, "");
+ if (!FileOrErr)
+ return Success;
+
+ llvm::CachedFileStream *CFS = FileOrErr->get();
+ raw_pwrite_stream &OS = *CFS->OS;
+ consumeError(key.write(OS));
+
+ OS << NewPreamble->memoryContents();
+
+ consumeError(CFS->commit());
- Preamble = It->second;
- } // End lock
-
- assert(Preamble);
- assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds,
- Files->getVirtualFileSystem()));
-
- assert(Invocation->getPreprocessorOpts().RetainRemappedFileBuffers ==
- false);
- // `PreprocessorOptions::RetainRemappedFileBuffers` defaults to false, so
- // MemoryBuffer will be cleaned up by the CompilerInstance, thus
- // `std::unique_ptr::release`.
- auto Buf = llvm::MemoryBuffer::getMemBufferCopy(
- (*MainFileBuffer)->getBuffer(), MainFilePath)
- .release();
-
- auto VFS = Files->getVirtualFileSystemPtr();
- Preamble->AddImplicitPreamble(*Invocation, VFS, Buf);
- auto NewFiles = makeIntrusiveRefCnt(
- Files->getFileSystemOpts(), std::move(VFS));
-
- return Action::runInvocation(std::move(Invocation), NewFiles.get(),
- std::move(PCHContainerOps), DiagConsumer);
+ return Success;
+ } else {
+ std::shared_ptr Preamble;
+ {
+ PrecompiledPreambles &Preambles = SYCLToolchain::instance().Preambles;
+ std::lock_guard Lock{Preambles.Mutex};
+ auto [It, Inserted] = Preambles.PreamblesMap.try_emplace(key);
+
+ if (Inserted) {
+ llvm::ErrorOr NewPreamble = BuildPreamble();
+
+ if (!NewPreamble)
+ return false;
+
+ It->second = std::make_shared(
+ std::move(NewPreamble.get()));
+ }
+
+ Preamble = It->second;
+ } // End lock
+
+ assert(Preamble);
+ assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds,
+ Files->getVirtualFileSystem()));
+
+ assert(Invocation->getPreprocessorOpts().RetainRemappedFileBuffers ==
+ false);
+ // `PreprocessorOptions::RetainRemappedFileBuffers` defaults to false,
+ // so MemoryBuffer will be cleaned up by the CompilerInstance, thus
+ // `std::unique_ptr::release`.
+ auto Buf = llvm::MemoryBuffer::getMemBufferCopy(
+ (*MainFileBuffer)->getBuffer(), MainFilePath)
+ .release();
+
+ auto VFS = Files->getVirtualFileSystemPtr();
+ Preamble->AddImplicitPreamble(*Invocation, VFS, Buf);
+ auto NewFiles = makeIntrusiveRefCnt(
+ Files->getFileSystemOpts(), std::move(VFS));
+
+ return Action::runInvocation(std::move(Invocation), NewFiles.get(),
+ std::move(PCHContainerOps), DiagConsumer);
+ }
}
};
@@ -262,7 +524,7 @@ class SYCLToolchain {
const char *SourceFilePath, FrontendAction &FEAction,
IntrusiveRefCntPtr FSOverlay = nullptr,
DiagnosticConsumer *DiagConsumer = nullptr,
- bool UseAutoPCH = false) {
+ bool EnableAutoPCHOpts = false) {
std::vector CommandLine =
createCommandLine(UserArgList, Format, SourceFilePath);
@@ -275,24 +537,41 @@ class SYCLToolchain {
auto Files = llvm::makeIntrusiveRefCnt(
clang::FileSystemOptions{"." /* WorkingDir */}, FS);
- Action Normal{FEAction};
+ auto Run = [&](auto &Action) {
+ ToolInvocation TI{std::move(CommandLine), &Action, Files.get(),
+ std::make_shared()};
- // User compilation options must be part of the key in the preambles map. We
- // can either use "raw" user options or the "processed" from
- // `createCommandLine` as long as we're consistent in what we're using.
- // Current internal APIs pass `InputArgList` around instead of a single
- // `std::string`, so it's easier to use `CommandLine`. Just make sure to
- // drop `rtc_N.cpp` that is always different:
- ActionWithPCHPreamble WithPreamble{FEAction,
- join(drop_end(CommandLine, 1), " ")};
- ToolInvocation TI{std::move(CommandLine),
- UseAutoPCH ? static_cast(&WithPreamble)
- : &Normal,
- Files.get(), std::make_shared()};
+ TI.setDiagnosticConsumer(DiagConsumer ? DiagConsumer : &IgnoreDiag);
- TI.setDiagnosticConsumer(DiagConsumer ? DiagConsumer : &IgnoreDiag);
+ return TI.run();
+ };
- return TI.run();
+ if (!EnableAutoPCHOpts) {
+ Action A{FEAction};
+ return Run(A);
+ }
+ if (UserArgList.hasArg(OPT_auto_pch)) {
+ // User compilation options must be part of the key in the preambles map.
+ // We can either use "raw" user options or the "processed" from
+ // `createCommandLine` as long as we're consistent in what we're using.
+ // Current internal APIs pass `InputArgList` around instead of a single
+ // `std::string`, so it's easier to use `CommandLine`. Just make sure to
+ // drop `rtc_N.cpp` that is always different:
+ ActionWithPCHPreamble WithPreamble{
+ FEAction, join(drop_end(CommandLine, 1), " ")};
+ return Run(WithPreamble);
+ }
+ if (UserArgList.hasArg(OPT_persistent_auto_pch_EQ)) {
+ // The comment above applies here as well.
+ ActionWithPCHPreamble WithPreamble{
+ FEAction, join(drop_end(CommandLine, 1), " "),
+ UserArgList.getLastArgValue(OPT_persistent_auto_pch_EQ).str()};
+ return Run(WithPreamble);
+ }
+
+ // Auto-PCH allowed for this FEAction but not requested by the user:
+ Action A{FEAction};
+ return Run(A);
}
Expected loadBitcodeLibrary(StringRef LibPath,
@@ -460,11 +739,10 @@ Expected jit_compiler::compileDeviceCode(
DiagnosticOptions DiagOpts;
ClangDiagnosticWrapper Wrapper(BuildLog, &DiagOpts);
- bool AutoPCH = UserArgList.hasArg(OPT_auto_pch);
-
if (SYCLToolchain::instance().run(UserArgList, Format, SourceFile.Path, ELOA,
getInMemoryFS(SourceFile, IncludeFiles),
- Wrapper.consumer(), AutoPCH)) {
+ Wrapper.consumer(),
+ true /* EnableAutoPCHOpts */)) {
return ELOA.takeModule();
} else {
return createStringError(BuildLog);
@@ -944,6 +1222,11 @@ jit_compiler::parseUserArgs(View UserArgs) {
UnsupportedArg->getAsString(AL).c_str());
}
+ if (AL.hasArg(OPT_auto_pch) && AL.hasArg(OPT_persistent_auto_pch_EQ)) {
+ return createStringError(
+ "--auto-pch and --persistent-auto-pch= cannot be used together");
+ }
+
return std::move(AL);
}
diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
index 1d57f445c2b3f..553ae6dd219be 100644
--- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
+++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc
@@ -1131,11 +1131,10 @@ forward-declarable.
The first time this option is passed, the compiler finds the initial set of
preprocessor directives (e.g., `#define`/`#include`) and comments in the
-compiled source string (the preamble) and pre-compiles it. Essentialy, it
-behaves like a precompiled header containing that preamble. On subsequent
-compilations, if the compiled source string has the same preamble and the same
-compilation options are used, the precompiled preamble is used, which speeds up
-compilation.
+compiled source string (the preamble) and precompiles it. Essentialy, it behaves
+like a precompiled header containing that preamble. On subsequent compilations,
+if the compiled source string has the same preamble and the same compilation
+options are used, the precompiled preamble is used, which speeds up compilation.
If the compiled source string has a different preamble or compilation options
differ, a new precompiled preamble is generated, and that preamble can also be
@@ -1181,6 +1180,32 @@ generated precompiled preamble can be used:
and `-Werror=pch-date-time`, which cause the compiler to diagnose a warning
or error in this scenario.
+==== `--persistent-auto-pch=`
+
+Similar to `--auto-pch` but the precompiled preamble is stored on the filesystem
+at the location ``. That allows an application to precompile the preamble
+once and reuse it across multiple invocation of the application which isn't
+possible with `--auto-pch` that stores the precompiled preamble in-memory. This
+option is incompatible with `--auto-pch`.
+
+Some notes about the current behavior:
+
+* Threads only communicate via the persistent cache on the filesystem with no
+ other inter-thread communication to avoid extra synchronization overhead. In
+ an unlikely event that a preamble is missing from the cache and multiple
+ threads need it, each thread will build the preamble.
+
+* In the **very** unlikely event of a hash collision the precompiled preamble
+ from the persistent cache is ignored and the source is compiled without it.
+
+* Unlike `--auto-pch`, the `--persistent-auto-pch` option indexes the
+ precompiled preamble by a hash of the preamble content/compilation options, as
+ opposed to using those directly as a key. As a result, there is a highly
+ unlikely possibility that two different preambles will produce the same hash
+ value. Even if this occurs, the compilation won't fail. However, a compilation
+ using the conflicting hash would proceed without pre-compiled preamble support
+ as if this option wasn't enabled.
+
=== Known issues and limitations when the language is `sycl`
==== Changing the compiler action or output
diff --git a/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp b/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp
new file mode 100644
index 0000000000000..ab2e1500e9103
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp
@@ -0,0 +1,47 @@
+// RUN: %{build} -o %t.out
+// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out | FileCheck %s
+
+// UNSUPPORTED: target-native_cpu
+// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142
+
+#include
+#include
+
+using namespace std::string_view_literals;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+int main(int argc, char **argv) {
+
+ sycl::queue q;
+ auto props = syclexp::properties{syclexp::build_options{
+ std::vector{"--auto-pch", "--persistent-auto-pch=/tmp"}}};
+
+ try {
+ std::string src = R"""(
+#include
+#include
+
+namespace syclext = sycl::ext::oneapi;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+extern "C"
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
+void iota(float start, float *ptr) {
+ size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
+ ptr[id] = start + static_cast(id);
+}
+)""";
+
+ // Error when generating PCH:
+ // CHECK-LABEL: Parsing of user arguments failed
+ // CHECK-NEXT: Detailed information:
+ // CHECK-NEXT: --auto-pch and --persistent-auto-pch= cannot be used together
+
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, src);
+ auto kb_exe = syclexp::build(kb_src, props);
+ return 1;
+ } catch (sycl::exception &e) {
+ std::cout << e.what() << std::endl;
+ }
+}
diff --git a/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp b/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp
index 1546498cad838..3ead976025b09 100644
--- a/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp
+++ b/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -o %t.out
+// RUN: %{build} '-DPCH_DIR="%/t.dir"' -o %t.out
// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out
// UNSUPPORTED: target-native_cpu
@@ -9,10 +9,13 @@
#include
#include
+#include
#include
namespace syclexp = sycl::ext::oneapi::experimental;
int main() {
+ std::error_code ec;
+ std::filesystem::remove_all(PCH_DIR, ec); // noexcept overload
sycl::queue q;
constexpr int N = 16;
std::string src_str = R"""(
@@ -48,4 +51,12 @@ void foo(int *p) {
t = std::thread{Run, auto_pch};
for (auto &t : threads)
t.join();
+
+ auto persistent_auto_pch = syclexp::properties{syclexp::build_options{
+ std::vector{"--persistent-auto-pch=" PCH_DIR}}};
+
+ for (auto &t : threads)
+ t = std::thread{Run, persistent_auto_pch};
+ for (auto &t : threads)
+ t.join();
}
diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp
new file mode 100644
index 0000000000000..ef0d9740e767b
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp
@@ -0,0 +1,84 @@
+// We want to use %{run-unfiltered-devices} for this test, it's easier if it's
+// limited to SPIR-V target.
+// REQUIRES: target-spir
+
+// %t.out arguments:
+// * path to the persistent pch cache
+// * VAL variable set in the a.hpp from which the precompiled preamble is
+// compiled
+// * `-DOPT_TO_AFFECT_HASH=` to affect the produced hash of the preamble+opts
+
+// RUN: %{build} -o %t.out
+// RUN: %{run-aux} rm -rf %t.cache1 %t.cache2
+
+// Two normal persistent-auto-pch gen/use runs
+// RUN: %{run-unfiltered-devices} %t.out %t.cache1 42 1 | FileCheck %s -DVAL=42
+// RUN: %{run-unfiltered-devices} %t.out %t.cache1 42 1 | FileCheck %s -DVAL=42
+// RUN: %{run-unfiltered-devices} %t.out %t.cache2 43 2 | FileCheck %s -DVAL=43
+// RUN: %{run-unfiltered-devices} %t.out %t.cache2 43 2 | FileCheck %s -DVAL=43
+
+// Content of the a.hpp changes, but auto-pch doesn't track that, so the
+// precompiled preamble from the cache is reused:
+// RUN: %{run-unfiltered-devices} %t.out %t.cache1 44 1 | FileCheck %s -DVAL=42
+
+// Simulate collision - actual compilation opts stored on disk have
+// "-DOPT_TO_AFFECT_HASH=2" while the hash encoded in the filename is still
+// "-DOPT_TO_AFFECT_HASH=1". The "/*" below is the primary reason we're using
+// %{run-unfiltered-devices} as we need to reference the cached file of which we
+// don't know the name.
+// RUN: %{run-aux} cp %t.cache2/* %t.cache1/*
+// PCH on disk is ignored due to cache collision:
+// RUN: %{run-unfiltered-devices} %t.out %t.cache1 44 1 | FileCheck %s -DVAL=44
+
+// CHECK: Result: [[VAL]]
+
+#include
+#include
+#include
+
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+std::string getInclude(int val) {
+ return
+ R"""(
+#include
+#include
+inline constexpr int VAL = )""" +
+ std::to_string(val) + ";\n";
+}
+
+const std::string src = R"""(
+#include "a.hpp"
+
+namespace syclext = sycl::ext::oneapi;
+namespace syclexp = sycl::ext::oneapi::experimental;
+extern "C"
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
+void foo(int *p) {
+ *p = VAL;
+}
+)""";
+
+int main(int argc, char *argv[]) {
+ sycl::queue q;
+ assert(argc == 4);
+ std::string pch_dir = argv[1];
+ int value = std::atoi(argv[2]);
+ int hash_def = std::atoi(argv[3]);
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, src,
+ syclexp::properties{syclexp::include_files{"a.hpp", getInclude(value)}});
+ auto kb_exe = syclexp::build(
+ kb_src,
+ syclexp::properties{syclexp::build_options{std::vector{
+ "--persistent-auto-pch=" + pch_dir,
+ "-DOPT_TO_AFFECT_HASH=" + std::to_string(hash_def)}}});
+ sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo");
+ auto *p = sycl::malloc_shared(1, q);
+ q.submit([&](sycl::handler &cgh) {
+ cgh.set_args(p);
+ cgh.single_task(krn);
+ }).wait();
+ std::cout << "Result: " << *p << std::endl;
+ sycl::free(p, q);
+}
diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp
new file mode 100644
index 0000000000000..6b6c66de71e85
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp
@@ -0,0 +1,50 @@
+// We want to use %{run-unfiltered-devices} for this test, it's easier if it's
+// limited to SPIR-V target.
+// REQUIRES: target-spir
+
+// PCH_DIR needs to be the same between build/run, so use %{run-aux}
+// extensively.
+
+// RUN: %{run-aux} %{build} '-DPCH_DIR="%/t.dir"' -o %t.out
+// RUN: %{run-aux} rm -rf %t.dir
+
+// Generate:
+// RUN: %{run-unfiltered-devices} %t.out
+
+// Use:
+// RUN: %{run-unfiltered-devices} %t.out
+
+// File too small:
+// RUN: %{run-aux} echo "1" > %t.dir/*
+// RUN: %{run-unfiltered-devices} %t.out
+
+// Cache file has garbage:
+// RUN: %{run-aux} cp %s %t.dir/*
+// RUN: %{run-unfiltered-devices} %t.out
+
+#include
+#include
+
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+const std::string src = R"""(
+#include
+#include
+
+namespace syclext = sycl::ext::oneapi;
+namespace syclexp = sycl::ext::oneapi::experimental;
+extern "C"
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
+void foo(int *p) {
+ *p = 42;
+}
+)""";
+
+int main() {
+ sycl::queue q;
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, src);
+ auto kb_exe = syclexp::build(
+ kb_src, syclexp::properties{syclexp::build_options{
+ std::vector{"--persistent-auto-pch=" PCH_DIR}}});
+}
diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp
new file mode 100644
index 0000000000000..44d4d920d1692
--- /dev/null
+++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp
@@ -0,0 +1,67 @@
+// RUN: %{build} '-DPCH_DIR="%/t.dir"' -O3 -o %t.out
+// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out
+
+// UNSUPPORTED: target-native_cpu
+// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+using namespace std::string_view_literals;
+namespace syclexp = sycl::ext::oneapi::experimental;
+int main() {
+ sycl::queue q;
+ constexpr int N = 16;
+
+ auto Run = [&](int i) {
+ std::string preamble = R"""(
+#include
+#include
+)""";
+
+ std::string body = R"""(
+namespace syclext = sycl::ext::oneapi;
+namespace syclexp = sycl::ext::oneapi::experimental;
+
+extern "C"
+SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
+void iota(float start, float *ptr) {
+ size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
+ ptr[id] = start + static_cast(id) + VAL;
+}
+)""";
+
+ std::string src_str = preamble +
+ "inline constexpr int VAL = " + std::to_string(i) +
+ ";\n" + body;
+ auto kb_src = syclexp::create_kernel_bundle_from_source(
+ q.get_context(), syclexp::source_language::sycl, src_str);
+ auto kb_exe = syclexp::build(
+ kb_src,
+ syclexp::properties{syclexp::build_options{
+ std::vector{"--persistent-auto-pch=" PCH_DIR}}});
+ };
+
+ std::thread threads[N];
+
+ for (int i = 0; i < N; ++i) {
+ // Use noexcept overload to avoid exception if PCH_DIR doesn't exist:
+ std::error_code ec;
+ std::filesystem::remove_all(PCH_DIR, ec);
+
+ threads[i] = std::thread{Run, i};
+ using namespace std::chrono_literals;
+ std::this_thread::sleep_for(100ms);
+ }
+
+ for (auto &t : threads)
+ if (t.joinable())
+ t.join();
+}
diff --git a/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp
index c37d8febb5acb..ca70efa9b2db4 100644
--- a/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp
+++ b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp
@@ -1,4 +1,4 @@
-// RUN: %{build} -O3 -o %t.out
+// RUN: %{build} '-DPCH_DIR="%/t.dir"' -O3 -o %t.out
// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out
// UNSUPPORTED: target-native_cpu
@@ -8,6 +8,7 @@
#include
#include
+#include
#include
#include
#include
@@ -77,14 +78,22 @@ void iota(float start, float *ptr) {
std::cout << "| ";
Run(syclexp::properties{
syclexp::build_options{std::vector{"--auto-pch"}}});
+ std::error_code ec;
+ std::filesystem::remove_all(PCH_DIR, ec);
+
+ std::cout << "| ";
+ Run(syclexp::properties{syclexp::build_options{
+ std::vector{"--persistent-auto-pch=" PCH_DIR}}});
std::cout << std::endl;
}
int main(int argc, char **argv) {
// So that output could be copy-pasted into GH comments and rendered as a
// table:
- std::cout << "Extra Headers | Without PCH | With auto-PCH" << std::endl;
- std::cout << "-|-|-" << std::endl;
+ std::cout << "Extra Headers | Without PCH | With Auto-PCH | With Persistent "
+ "Auto-PCH"
+ << std::endl;
+ std::cout << "-|-|-|-" << std::endl;
run({});
run({"sycl/half_type.hpp"});
run({"sycl/ext/oneapi/bfloat16.hpp"});
@@ -92,4 +101,5 @@ int main(int argc, char **argv) {
run({"sycl/vector.hpp"});
run({"sycl/multi_ptr.hpp"});
run({"sycl/builtins.hpp"});
+ run({"sycl/ext/oneapi/matrix/matrix.hpp"});
}