diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e1a3cbf6b1ade..540d589bd270b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7557,6 +7557,10 @@ let Visibility = [SYCLRTCOnlyOption] in { let Group = sycl_rtc_only_Group in { def auto_pch : Flag<["--"], "auto-pch">, HelpText<"Enable Auto-PCH for SYCL RTC Compilation">; + def persistent_auto_pch_EQ + : Joined<["--"], "persistent-auto-pch=">, + HelpText<"Use Persistent Auto-PCH cache located at for SYCL " + "RTC Compilation">; } // let Group = sycl_rtc_only_Group } // let Visibility = [SYCLRTCOnlyOption] diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index 4b3935abea411..9558cc9cb5259 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -132,6 +132,8 @@ class PrecompiledPreamble { IntrusiveRefCntPtr &VFS, llvm::MemoryBuffer *MainFileBuffer) const; + llvm::StringRef memoryContents() const; + private: PrecompiledPreamble(std::unique_ptr Storage, std::vector PreambleBytes, diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index f53bf24adc76c..92fe2690e4fbb 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -727,6 +727,10 @@ void PrecompiledPreamble::OverridePreamble( configurePreamble(Bounds, CI, VFS, MainFileBuffer); } +llvm::StringRef PrecompiledPreamble::memoryContents() const { + return Storage->memoryContents(); +} + PrecompiledPreamble::PrecompiledPreamble( std::unique_ptr Storage, std::vector PreambleBytes, bool PreambleEndsAtStartOfLine, diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp index 7caf761e05f91..a2c09f615209a 100644 --- a/clang/test/Driver/sycl-unsupported.cpp +++ b/clang/test/Driver/sycl-unsupported.cpp @@ -64,14 +64,20 @@ // UNSUPPORTED_OPT-NOT: clang{{.*}} "-fsycl-is-device"{{.*}} "[[OPT_CC1]]{{.*}}" // UNSUPPORTED_OPT: clang{{.*}} "-fsycl-is-host"{{.*}} "[[OPT_CC1]]{{.*}}" -// "--auto-pch" should only be enabled for SYCL RTC compilations, regular driver -// shouldn't know about it: +// Options that should only be enabled for SYCL RTC compilations, regular driver +// shouldn't know about them: // // RUN: not %clangxx -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH // RUN: not %clangxx -fsycl-device-only -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH // RUN: not %clangxx -fsycl -### %s --auto-pch 2>&1 | FileCheck %s --check-prefix AUTO_PCH // // AUTO_PCH: error: unknown argument: '--auto-pch' +// +// RUN: not %clangxx -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH +// RUN: not %clangxx -fsycl-device-only -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH +// RUN: not %clangxx -fsycl -### %s --persistent-auto-pch="%t.dir" 2>&1 | FileCheck %s --check-prefix PERSISTENT_AUTO_PCH +// +// PERSISTENT_AUTO_PCH: error: unknown argument: '--persistent-auto-pch={{.*}}' // FPGA support has been removed, usage of any FPGA specific options and any // options that have FPGA specific arguments should emit a specific error diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 95180b10e1e9c..439196063bca8 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -46,6 +46,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -67,7 +71,100 @@ using namespace llvm::vfs; using namespace jit_compiler; namespace { +struct AutoPCHError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + assert(false && "AutoPCHError doesn't support convertToErrorCode!"); + return {}; + } + + void log(raw_ostream &OS) const override { OS << "auto-pch error"; } +}; + +char AutoPCHError::ID = 0; + +// This key is the same for both in-memory and persistent auto-pch. +struct auto_pch_key { + std::string Opts; + std::string Preamble; + + void update_hasher(BLAKE3 &Hasher) const { + Hasher.update(Opts); + Hasher.update(Preamble); + } + + Error write(raw_pwrite_stream &OS) const { + AppendingBinaryByteStream Stream(llvm::endianness::little); + BinaryStreamWriter Writer(Stream); + if (auto Error = Writer.writeInteger(Opts.size())) + return Error; + if (auto Error = Writer.writeFixedString(Opts)) + return Error; + if (auto Error = Writer.writeInteger(Preamble.size())) + return Error; + if (auto Error = Writer.writeFixedString(Preamble)) + return Error; + + OS.SetBuffered(); + for (uint8_t x : Stream.data()) + OS << x; + return Error::success(); + } + + Error read(llvm::BinaryStreamReader &Reader) { + (void)AutoPCHError::ID; + auto ReadStr = [&](std::string &Out) -> Error { + std::string::size_type StrLen; + + if (auto Err = Reader.readInteger(StrLen)) + return Err; + + if (StrLen >= std::numeric_limits::max()) + return make_error(); + + StringRef Str; + if (auto Err = Reader.readFixedString(Str, (uint32_t)StrLen)) + return Err; + + Out = Str.str(); + return Error::success(); + }; + + if (auto Err = ReadStr(Opts)) + return Err; + + return ReadStr(Preamble); + } + + friend bool operator==(const auto_pch_key &lhs, const auto_pch_key &rhs) { + return std::tie(lhs.Opts, lhs.Preamble) == std::tie(rhs.Opts, rhs.Preamble); + } + friend bool operator!=(const auto_pch_key &lhs, const auto_pch_key &rhs) { + return !(lhs == rhs); + } + friend bool operator<(const auto_pch_key &lhs, const auto_pch_key &rhs) { + return std::tie(lhs.Opts, lhs.Preamble) < std::tie(rhs.Opts, rhs.Preamble); + } +}; +} // namespace + +template <> struct std::hash { + size_t operator()(const auto_pch_key &key) const { + BLAKE3 Hasher; + key.update_hasher(Hasher); + + // No `std::bit_cast` in c++17, emulate: + auto Hash = Hasher.result(); + size_t Result; + static_assert(sizeof(Hash) == sizeof(size_t)); + std::memcpy(&Result, &Hash, sizeof(size_t)); + return Result; + } +}; +namespace { class SYCLToolchain { // TODO: For some reason, moving this to a data member of the single instance // of SYCLToolchain results in some data races leading to memory corruption @@ -89,14 +186,13 @@ class SYCLToolchain { SYCLToolchain() = default; struct PrecompiledPreambles { - using key = std::pair; std::mutex Mutex; - std::map> PreamblesMap; + std::map> PreamblesMap; }; // Similar to FrontendActionFactory, but we don't take ownership of - // `FrontendAction`, nor do we create copies of it as we only perform a single - // `ToolInvocation`. + // `FrontendAction`, nor do we create copies of it as we only perform a + // single `ToolInvocation`. class Action : public ToolAction { FrontendAction &FEAction; @@ -139,9 +235,9 @@ class SYCLToolchain { DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_fsycl_device_only)); - // User args may contain options not intended for the frontend, but we can't - // claim them here to tell the driver they're used later. Hence, suppress - // the unused argument warning. + // User args may contain options not intended for the frontend, but we + // can't claim them here to tell the driver they're used later. Hence, + // suppress the unused argument warning. DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Qunused_arguments)); if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) { @@ -175,12 +271,45 @@ class SYCLToolchain { return CommandLine; } + template class ActionWithPCHPreamble : public Action { std::string CmdLineOpts; + std::string PersistentPCHDir; // Empty if !Persistent. + + static void addImplicitPersistentPreamble( + std::unique_ptr PrecompiledPreamble, + const PreambleBounds &Bounds, CompilerInvocation &CI, + IntrusiveRefCntPtr &VFS) { + + // Processing similar to PrecompiledPreamble::configurePreamble. + + auto &PreprocessorOpts = CI.getPreprocessorOpts(); + PreprocessorOpts.PrecompiledPreambleBytes.first = Bounds.Size; + PreprocessorOpts.PrecompiledPreambleBytes.second = + Bounds.PreambleEndsAtStartOfLine; + PreprocessorOpts.DisablePCHOrModuleValidation = + DisableValidationForModuleKind::PCH; + + std::string PCHPath = (SYCLToolchain::instance().getPrefix() + + "/remapped_persistent_preamble") + .str(); + PreprocessorOpts.ImplicitPCHInclude = PCHPath; + + auto PCHFS = llvm::makeIntrusiveRefCnt(); + PCHFS->addFile(PCHPath, 0, std::move(PrecompiledPreamble)); + auto OverlayFS = + llvm::makeIntrusiveRefCnt(VFS); + OverlayFS->pushOverlay(PCHFS); + VFS = std::move(OverlayFS); + } public: - ActionWithPCHPreamble(FrontendAction &FEAction, std::string &&CmdLineOpts) - : Action(FEAction), CmdLineOpts(std::move(CmdLineOpts)) {} + ActionWithPCHPreamble(FrontendAction &FEAction, std::string &&CmdLineOpts, + std::string PersistentPCHDir = {}) + : Action(FEAction), CmdLineOpts(std::move(CmdLineOpts)), + PersistentPCHDir(std::move(PersistentPCHDir)) { + assert(this->PersistentPCHDir.empty() || Persistent); + } bool runInvocation(std::shared_ptr Invocation, FileManager *Files, @@ -193,62 +322,195 @@ class SYCLToolchain { PreambleBounds Bounds = ComputePreambleBounds( Invocation->getLangOpts(), **MainFileBuffer, 100 /* MaxLines */); - PrecompiledPreambles::key key{ + auto_pch_key key{ std::move(CmdLineOpts), (*MainFileBuffer)->getBuffer().substr(0, Bounds.Size).str()}; - std::shared_ptr Preamble; - { - PrecompiledPreambles &Preambles = SYCLToolchain::instance().Preambles; - std::lock_guard Lock{Preambles.Mutex}; - auto [It, Inserted] = Preambles.PreamblesMap.try_emplace(key); - - if (Inserted) { - PreambleCallbacks Callbacks; - auto DiagIds = llvm::makeIntrusiveRefCnt(); - auto DiagOpts = Invocation->getDiagnosticOpts(); - auto Diags = llvm::makeIntrusiveRefCnt( - DiagIds, DiagOpts, DiagConsumer, false); - - static std::string StoragePath = - (SYCLToolchain::instance().getPrefix() + "/preambles").str(); - llvm::ErrorOr NewPreamble = - PrecompiledPreamble::Build( - *Invocation, MainFileBuffer->get(), Bounds, Diags, - Files->getVirtualFileSystemPtr(), PCHContainerOps, - /*StorePreamblesInMemory*/ true, StoragePath, Callbacks, - /*AllowASTWithErrors=*/false); - - if (!NewPreamble) - return false; - - It->second = std::make_shared( - std::move(NewPreamble.get())); + // In-memory for both `Persistent` and not because PrecompiledPreamble's + // `StorePreamblesInMemory==false` would create a *temporary* pch file + // on the file system, it will still be removed once preamble object + // dies. + auto BuildPreamble = [&]() { + PreambleCallbacks Callbacks; + auto DiagIds = llvm::makeIntrusiveRefCnt(); + auto DiagOpts = Invocation->getDiagnosticOpts(); + auto Diags = llvm::makeIntrusiveRefCnt( + DiagIds, DiagOpts, DiagConsumer, false); + + static std::string StoragePath = + (SYCLToolchain::instance().getPrefix() + "/preambles").str(); + return PrecompiledPreamble::Build( + *Invocation, MainFileBuffer->get(), Bounds, Diags, + Files->getVirtualFileSystemPtr(), PCHContainerOps, + /*StorePreamblesInMemory*/ true, StoragePath, Callbacks, + /*AllowASTWithErrors=*/false); + }; + + if constexpr (Persistent) { + BLAKE3 Hasher; + key.update_hasher(Hasher); + + std::string EncodedHash = encodeBase64(Hasher.result()); + // Make the encoding filesystem-friendly. + std::replace(EncodedHash.begin(), EncodedHash.end(), '/', '-'); + + // `llvm::localCache`'s API uses a callback to process cached data and + // the callback's return value (if any) is effectively ignored, so we + // need this extra `Success` variable to be able to properly return + // compilation status. + bool Success = false; + auto RunWithoutPCH = [&]() -> bool { + // Run original invocation: + Success = + Action::runInvocation(std::move(Invocation), Files, + std::move(PCHContainerOps), DiagConsumer); + return Success; + }; + + auto UseCachedPreamble = [&](StringRef PCHContent) { + std::unique_ptr PCHMemBuf = + MemoryBuffer::getMemBufferCopy(PCHContent); + + auto VFS = Files->getVirtualFileSystemPtr(); + addImplicitPersistentPreamble(std::move(PCHMemBuf), Bounds, + *Invocation, VFS); + + auto NewFiles = makeIntrusiveRefCnt( + Files->getFileSystemOpts(), std::move(VFS)); + + Success = + Action::runInvocation(std::move(Invocation), NewFiles.get(), + std::move(PCHContainerOps), DiagConsumer); + return Success; + }; + + // `llvm::localCache` calls the callback on either succesful cache read + // or during "commit" if an entry is being created. The problem is that + // commit might fail and the callback won't be called at all. It's + // easier to just don't rely on it on cache miss and perform compilation + // with newly generated preamble ourselves. + bool CacheHit = true; + + auto CacheCallback = [&](size_t, const Twine &, + std::unique_ptr MB) -> void { + if (!CacheHit) + return; // See above. + + llvm::MemoryBufferByteStream MemBufStream{std::move(MB), + llvm::endianness::little}; + llvm::BinaryStreamReader Reader(MemBufStream); + + auto_pch_key persistent_key; + // In case of any errors reading the cache, treat it as a hash + // collision and just compile without using PCH. + if (errorToBool(persistent_key.read(Reader))) + return (void)RunWithoutPCH(); + + // Hash collision, **very** unlikely. + if (key != persistent_key) + return (void)RunWithoutPCH(); + + StringRef PCHStorage; + + // This restriction is simply due to the `BinaryStreamReader|Writer` + // APIs. Pre-compiled preambles in tests seem to be low double digits + // megabytes which is well under 4GB limit imposed here. + if (Reader.bytesRemaining() >= std::numeric_limits::max()) + return (void)RunWithoutPCH(); + if (errorToBool(Reader.readFixedString( + PCHStorage, static_cast(Reader.bytesRemaining())))) + return (void)RunWithoutPCH(); + + return (void)UseCachedPreamble(PCHStorage); + }; + + auto CacheOrErr = + llvm::localCache("SYCL RTC Persistent Preambles", "syclrtc-tmp-", + PersistentPCHDir, CacheCallback); + + assert(CacheOrErr && "Don't see any code path returning Error"); + auto AddStreamOrErr = (*CacheOrErr)(0, EncodedHash, ""); + if (!AddStreamOrErr) { + // Not a hit, but we won't be able to store the data in the cache, so + // no need to generate precompiled preamble. + consumeError(AddStreamOrErr.takeError()); + return RunWithoutPCH(); } + auto &AddStream = *AddStreamOrErr; + if (!AddStream) { + // UseCachedPreamble was called by the cache after successfully + // reading persistent auto-pch file. + return Success; + } + CacheHit = false; + + llvm::ErrorOr NewPreamble = BuildPreamble(); + if (!NewPreamble) { + return false; + } + + // We could have used `NewPreamble`'s `AddImplicitPreamble` (i.e., as on + // the in-memory/non-persistent path) here but I think it's better to + // use the same code on cache read/miss: + UseCachedPreamble(NewPreamble->memoryContents()); + + // Any errors updating the persistent preambles cache won't affect + // current compilation, so ignore any error below: + + auto FileOrErr = AddStream(1, ""); + if (!FileOrErr) + return Success; + + llvm::CachedFileStream *CFS = FileOrErr->get(); + raw_pwrite_stream &OS = *CFS->OS; + consumeError(key.write(OS)); + + OS << NewPreamble->memoryContents(); + + consumeError(CFS->commit()); - Preamble = It->second; - } // End lock - - assert(Preamble); - assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds, - Files->getVirtualFileSystem())); - - assert(Invocation->getPreprocessorOpts().RetainRemappedFileBuffers == - false); - // `PreprocessorOptions::RetainRemappedFileBuffers` defaults to false, so - // MemoryBuffer will be cleaned up by the CompilerInstance, thus - // `std::unique_ptr::release`. - auto Buf = llvm::MemoryBuffer::getMemBufferCopy( - (*MainFileBuffer)->getBuffer(), MainFilePath) - .release(); - - auto VFS = Files->getVirtualFileSystemPtr(); - Preamble->AddImplicitPreamble(*Invocation, VFS, Buf); - auto NewFiles = makeIntrusiveRefCnt( - Files->getFileSystemOpts(), std::move(VFS)); - - return Action::runInvocation(std::move(Invocation), NewFiles.get(), - std::move(PCHContainerOps), DiagConsumer); + return Success; + } else { + std::shared_ptr Preamble; + { + PrecompiledPreambles &Preambles = SYCLToolchain::instance().Preambles; + std::lock_guard Lock{Preambles.Mutex}; + auto [It, Inserted] = Preambles.PreamblesMap.try_emplace(key); + + if (Inserted) { + llvm::ErrorOr NewPreamble = BuildPreamble(); + + if (!NewPreamble) + return false; + + It->second = std::make_shared( + std::move(NewPreamble.get())); + } + + Preamble = It->second; + } // End lock + + assert(Preamble); + assert(Preamble->CanReuse(*Invocation, **MainFileBuffer, Bounds, + Files->getVirtualFileSystem())); + + assert(Invocation->getPreprocessorOpts().RetainRemappedFileBuffers == + false); + // `PreprocessorOptions::RetainRemappedFileBuffers` defaults to false, + // so MemoryBuffer will be cleaned up by the CompilerInstance, thus + // `std::unique_ptr::release`. + auto Buf = llvm::MemoryBuffer::getMemBufferCopy( + (*MainFileBuffer)->getBuffer(), MainFilePath) + .release(); + + auto VFS = Files->getVirtualFileSystemPtr(); + Preamble->AddImplicitPreamble(*Invocation, VFS, Buf); + auto NewFiles = makeIntrusiveRefCnt( + Files->getFileSystemOpts(), std::move(VFS)); + + return Action::runInvocation(std::move(Invocation), NewFiles.get(), + std::move(PCHContainerOps), DiagConsumer); + } } }; @@ -262,7 +524,7 @@ class SYCLToolchain { const char *SourceFilePath, FrontendAction &FEAction, IntrusiveRefCntPtr FSOverlay = nullptr, DiagnosticConsumer *DiagConsumer = nullptr, - bool UseAutoPCH = false) { + bool EnableAutoPCHOpts = false) { std::vector CommandLine = createCommandLine(UserArgList, Format, SourceFilePath); @@ -275,24 +537,41 @@ class SYCLToolchain { auto Files = llvm::makeIntrusiveRefCnt( clang::FileSystemOptions{"." /* WorkingDir */}, FS); - Action Normal{FEAction}; + auto Run = [&](auto &Action) { + ToolInvocation TI{std::move(CommandLine), &Action, Files.get(), + std::make_shared()}; - // User compilation options must be part of the key in the preambles map. We - // can either use "raw" user options or the "processed" from - // `createCommandLine` as long as we're consistent in what we're using. - // Current internal APIs pass `InputArgList` around instead of a single - // `std::string`, so it's easier to use `CommandLine`. Just make sure to - // drop `rtc_N.cpp` that is always different: - ActionWithPCHPreamble WithPreamble{FEAction, - join(drop_end(CommandLine, 1), " ")}; - ToolInvocation TI{std::move(CommandLine), - UseAutoPCH ? static_cast(&WithPreamble) - : &Normal, - Files.get(), std::make_shared()}; + TI.setDiagnosticConsumer(DiagConsumer ? DiagConsumer : &IgnoreDiag); - TI.setDiagnosticConsumer(DiagConsumer ? DiagConsumer : &IgnoreDiag); + return TI.run(); + }; - return TI.run(); + if (!EnableAutoPCHOpts) { + Action A{FEAction}; + return Run(A); + } + if (UserArgList.hasArg(OPT_auto_pch)) { + // User compilation options must be part of the key in the preambles map. + // We can either use "raw" user options or the "processed" from + // `createCommandLine` as long as we're consistent in what we're using. + // Current internal APIs pass `InputArgList` around instead of a single + // `std::string`, so it's easier to use `CommandLine`. Just make sure to + // drop `rtc_N.cpp` that is always different: + ActionWithPCHPreamble WithPreamble{ + FEAction, join(drop_end(CommandLine, 1), " ")}; + return Run(WithPreamble); + } + if (UserArgList.hasArg(OPT_persistent_auto_pch_EQ)) { + // The comment above applies here as well. + ActionWithPCHPreamble WithPreamble{ + FEAction, join(drop_end(CommandLine, 1), " "), + UserArgList.getLastArgValue(OPT_persistent_auto_pch_EQ).str()}; + return Run(WithPreamble); + } + + // Auto-PCH allowed for this FEAction but not requested by the user: + Action A{FEAction}; + return Run(A); } Expected loadBitcodeLibrary(StringRef LibPath, @@ -460,11 +739,10 @@ Expected jit_compiler::compileDeviceCode( DiagnosticOptions DiagOpts; ClangDiagnosticWrapper Wrapper(BuildLog, &DiagOpts); - bool AutoPCH = UserArgList.hasArg(OPT_auto_pch); - if (SYCLToolchain::instance().run(UserArgList, Format, SourceFile.Path, ELOA, getInMemoryFS(SourceFile, IncludeFiles), - Wrapper.consumer(), AutoPCH)) { + Wrapper.consumer(), + true /* EnableAutoPCHOpts */)) { return ELOA.takeModule(); } else { return createStringError(BuildLog); @@ -944,6 +1222,11 @@ jit_compiler::parseUserArgs(View UserArgs) { UnsupportedArg->getAsString(AL).c_str()); } + if (AL.hasArg(OPT_auto_pch) && AL.hasArg(OPT_persistent_auto_pch_EQ)) { + return createStringError( + "--auto-pch and --persistent-auto-pch= cannot be used together"); + } + return std::move(AL); } diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 1d57f445c2b3f..553ae6dd219be 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1131,11 +1131,10 @@ forward-declarable. The first time this option is passed, the compiler finds the initial set of preprocessor directives (e.g., `#define`/`#include`) and comments in the -compiled source string (the preamble) and pre-compiles it. Essentialy, it -behaves like a precompiled header containing that preamble. On subsequent -compilations, if the compiled source string has the same preamble and the same -compilation options are used, the precompiled preamble is used, which speeds up -compilation. +compiled source string (the preamble) and precompiles it. Essentialy, it behaves +like a precompiled header containing that preamble. On subsequent compilations, +if the compiled source string has the same preamble and the same compilation +options are used, the precompiled preamble is used, which speeds up compilation. If the compiled source string has a different preamble or compilation options differ, a new precompiled preamble is generated, and that preamble can also be @@ -1181,6 +1180,32 @@ generated precompiled preamble can be used: and `-Werror=pch-date-time`, which cause the compiler to diagnose a warning or error in this scenario. +==== `--persistent-auto-pch=` + +Similar to `--auto-pch` but the precompiled preamble is stored on the filesystem +at the location ``. That allows an application to precompile the preamble +once and reuse it across multiple invocation of the application which isn't +possible with `--auto-pch` that stores the precompiled preamble in-memory. This +option is incompatible with `--auto-pch`. + +Some notes about the current behavior: + +* Threads only communicate via the persistent cache on the filesystem with no + other inter-thread communication to avoid extra synchronization overhead. In + an unlikely event that a preamble is missing from the cache and multiple + threads need it, each thread will build the preamble. + +* In the **very** unlikely event of a hash collision the precompiled preamble + from the persistent cache is ignored and the source is compiled without it. + +* Unlike `--auto-pch`, the `--persistent-auto-pch` option indexes the + precompiled preamble by a hash of the preamble content/compilation options, as + opposed to using those directly as a key. As a result, there is a highly + unlikely possibility that two different preambles will produce the same hash + value. Even if this occurs, the compilation won't fail. However, a compilation + using the conflicting hash would proceed without pre-compiled preamble support + as if this option wasn't enabled. + === Known issues and limitations when the language is `sycl` ==== Changing the compiler action or output diff --git a/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp b/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp new file mode 100644 index 0000000000000..ab2e1500e9103 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/conflicting_auto_pch_opts.cpp @@ -0,0 +1,47 @@ +// RUN: %{build} -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out | FileCheck %s + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +#include +#include + +using namespace std::string_view_literals; +namespace syclexp = sycl::ext::oneapi::experimental; + +int main(int argc, char **argv) { + + sycl::queue q; + auto props = syclexp::properties{syclexp::build_options{ + std::vector{"--auto-pch", "--persistent-auto-pch=/tmp"}}}; + + try { + std::string src = R"""( +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id); +} +)"""; + + // Error when generating PCH: + // CHECK-LABEL: Parsing of user arguments failed + // CHECK-NEXT: Detailed information: + // CHECK-NEXT: --auto-pch and --persistent-auto-pch= cannot be used together + + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + auto kb_exe = syclexp::build(kb_src, props); + return 1; + } catch (sycl::exception &e) { + std::cout << e.what() << std::endl; + } +} diff --git a/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp b/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp index 1546498cad838..3ead976025b09 100644 --- a/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp +++ b/sycl/test-e2e/KernelCompiler/multi_threaded_rtc.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out +// RUN: %{build} '-DPCH_DIR="%/t.dir"' -o %t.out // RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out // UNSUPPORTED: target-native_cpu @@ -9,10 +9,13 @@ #include #include +#include #include namespace syclexp = sycl::ext::oneapi::experimental; int main() { + std::error_code ec; + std::filesystem::remove_all(PCH_DIR, ec); // noexcept overload sycl::queue q; constexpr int N = 16; std::string src_str = R"""( @@ -48,4 +51,12 @@ void foo(int *p) { t = std::thread{Run, auto_pch}; for (auto &t : threads) t.join(); + + auto persistent_auto_pch = syclexp::properties{syclexp::build_options{ + std::vector{"--persistent-auto-pch=" PCH_DIR}}}; + + for (auto &t : threads) + t = std::thread{Run, persistent_auto_pch}; + for (auto &t : threads) + t.join(); } diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp new file mode 100644 index 0000000000000..ef0d9740e767b --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_cache_collision.cpp @@ -0,0 +1,84 @@ +// We want to use %{run-unfiltered-devices} for this test, it's easier if it's +// limited to SPIR-V target. +// REQUIRES: target-spir + +// %t.out arguments: +// * path to the persistent pch cache +// * VAL variable set in the a.hpp from which the precompiled preamble is +// compiled +// * `-DOPT_TO_AFFECT_HASH=` to affect the produced hash of the preamble+opts + +// RUN: %{build} -o %t.out +// RUN: %{run-aux} rm -rf %t.cache1 %t.cache2 + +// Two normal persistent-auto-pch gen/use runs +// RUN: %{run-unfiltered-devices} %t.out %t.cache1 42 1 | FileCheck %s -DVAL=42 +// RUN: %{run-unfiltered-devices} %t.out %t.cache1 42 1 | FileCheck %s -DVAL=42 +// RUN: %{run-unfiltered-devices} %t.out %t.cache2 43 2 | FileCheck %s -DVAL=43 +// RUN: %{run-unfiltered-devices} %t.out %t.cache2 43 2 | FileCheck %s -DVAL=43 + +// Content of the a.hpp changes, but auto-pch doesn't track that, so the +// precompiled preamble from the cache is reused: +// RUN: %{run-unfiltered-devices} %t.out %t.cache1 44 1 | FileCheck %s -DVAL=42 + +// Simulate collision - actual compilation opts stored on disk have +// "-DOPT_TO_AFFECT_HASH=2" while the hash encoded in the filename is still +// "-DOPT_TO_AFFECT_HASH=1". The "/*" below is the primary reason we're using +// %{run-unfiltered-devices} as we need to reference the cached file of which we +// don't know the name. +// RUN: %{run-aux} cp %t.cache2/* %t.cache1/* +// PCH on disk is ignored due to cache collision: +// RUN: %{run-unfiltered-devices} %t.out %t.cache1 44 1 | FileCheck %s -DVAL=44 + +// CHECK: Result: [[VAL]] + +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +std::string getInclude(int val) { + return + R"""( +#include +#include +inline constexpr int VAL = )""" + + std::to_string(val) + ";\n"; +} + +const std::string src = R"""( +#include "a.hpp" + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void foo(int *p) { + *p = VAL; +} +)"""; + +int main(int argc, char *argv[]) { + sycl::queue q; + assert(argc == 4); + std::string pch_dir = argv[1]; + int value = std::atoi(argv[2]); + int hash_def = std::atoi(argv[3]); + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src, + syclexp::properties{syclexp::include_files{"a.hpp", getInclude(value)}}); + auto kb_exe = syclexp::build( + kb_src, + syclexp::properties{syclexp::build_options{std::vector{ + "--persistent-auto-pch=" + pch_dir, + "-DOPT_TO_AFFECT_HASH=" + std::to_string(hash_def)}}}); + sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo"); + auto *p = sycl::malloc_shared(1, q); + q.submit([&](sycl::handler &cgh) { + cgh.set_args(p); + cgh.single_task(krn); + }).wait(); + std::cout << "Result: " << *p << std::endl; + sycl::free(p, q); +} diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp new file mode 100644 index 0000000000000..6b6c66de71e85 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_read_error.cpp @@ -0,0 +1,50 @@ +// We want to use %{run-unfiltered-devices} for this test, it's easier if it's +// limited to SPIR-V target. +// REQUIRES: target-spir + +// PCH_DIR needs to be the same between build/run, so use %{run-aux} +// extensively. + +// RUN: %{run-aux} %{build} '-DPCH_DIR="%/t.dir"' -o %t.out +// RUN: %{run-aux} rm -rf %t.dir + +// Generate: +// RUN: %{run-unfiltered-devices} %t.out + +// Use: +// RUN: %{run-unfiltered-devices} %t.out + +// File too small: +// RUN: %{run-aux} echo "1" > %t.dir/* +// RUN: %{run-unfiltered-devices} %t.out + +// Cache file has garbage: +// RUN: %{run-aux} cp %s %t.dir/* +// RUN: %{run-unfiltered-devices} %t.out + +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +const std::string src = R"""( +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void foo(int *p) { + *p = 42; +} +)"""; + +int main() { + sycl::queue q; + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + auto kb_exe = syclexp::build( + kb_src, syclexp::properties{syclexp::build_options{ + std::vector{"--persistent-auto-pch=" PCH_DIR}}}); +} diff --git a/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp new file mode 100644 index 0000000000000..44d4d920d1692 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/persistent_auto_pch_stress_deletion.cpp @@ -0,0 +1,67 @@ +// RUN: %{build} '-DPCH_DIR="%/t.dir"' -O3 -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace std::string_view_literals; +namespace syclexp = sycl::ext::oneapi::experimental; +int main() { + sycl::queue q; + constexpr int N = 16; + + auto Run = [&](int i) { + std::string preamble = R"""( +#include +#include +)"""; + + std::string body = R"""( +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>)) +void iota(float start, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = start + static_cast(id) + VAL; +} +)"""; + + std::string src_str = preamble + + "inline constexpr int VAL = " + std::to_string(i) + + ";\n" + body; + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src_str); + auto kb_exe = syclexp::build( + kb_src, + syclexp::properties{syclexp::build_options{ + std::vector{"--persistent-auto-pch=" PCH_DIR}}}); + }; + + std::thread threads[N]; + + for (int i = 0; i < N; ++i) { + // Use noexcept overload to avoid exception if PCH_DIR doesn't exist: + std::error_code ec; + std::filesystem::remove_all(PCH_DIR, ec); + + threads[i] = std::thread{Run, i}; + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + } + + for (auto &t : threads) + if (t.joinable()) + t.join(); +} diff --git a/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp index c37d8febb5acb..ca70efa9b2db4 100644 --- a/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp +++ b/sycl/test-e2e/PerformanceTests/KernelCompiler/auto-pch.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -O3 -o %t.out +// RUN: %{build} '-DPCH_DIR="%/t.dir"' -O3 -o %t.out // RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out // UNSUPPORTED: target-native_cpu @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -77,14 +78,22 @@ void iota(float start, float *ptr) { std::cout << "| "; Run(syclexp::properties{ syclexp::build_options{std::vector{"--auto-pch"}}}); + std::error_code ec; + std::filesystem::remove_all(PCH_DIR, ec); + + std::cout << "| "; + Run(syclexp::properties{syclexp::build_options{ + std::vector{"--persistent-auto-pch=" PCH_DIR}}}); std::cout << std::endl; } int main(int argc, char **argv) { // So that output could be copy-pasted into GH comments and rendered as a // table: - std::cout << "Extra Headers | Without PCH | With auto-PCH" << std::endl; - std::cout << "-|-|-" << std::endl; + std::cout << "Extra Headers | Without PCH | With Auto-PCH | With Persistent " + "Auto-PCH" + << std::endl; + std::cout << "-|-|-|-" << std::endl; run({}); run({"sycl/half_type.hpp"}); run({"sycl/ext/oneapi/bfloat16.hpp"}); @@ -92,4 +101,5 @@ int main(int argc, char **argv) { run({"sycl/vector.hpp"}); run({"sycl/multi_ptr.hpp"}); run({"sycl/builtins.hpp"}); + run({"sycl/ext/oneapi/matrix/matrix.hpp"}); }