From 70be072a2c413aa9b7ddaf229fdb4de414b8727e Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 12 Mar 2025 08:03:27 -0700 Subject: [PATCH 01/31] [SYCL] Implement compile and link for source-based kernel bundles Implements a new `compile` variant for source-based `kernel_bundle` and the corresponding linking functionality. Signed-off-by: Larsen, Steffen --- .../llvm/SYCLPostLink/ModuleSplitter.h | 5 + llvm/lib/SYCLPostLink/ModuleSplitter.cpp | 11 + .../lib/rtc/DeviceCompilation.cpp | 11 +- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 82 ++- sycl/include/sycl/kernel_bundle.hpp | 63 ++ sycl/source/detail/device_binary_image.cpp | 455 ++++++++++++- sycl/source/detail/device_binary_image.hpp | 20 +- sycl/source/detail/device_image_impl.hpp | 598 ++++++++++-------- sycl/source/detail/kernel_bundle_impl.hpp | 184 +++++- .../program_manager/program_manager.cpp | 105 +-- .../program_manager/program_manager.hpp | 12 +- sycl/source/kernel_bundle.cpp | 33 + sycl/test-e2e/KernelCompiler/sycl_link.cpp | 102 +++ .../KernelCompiler/sycl_link_common_dep.cpp | 148 +++++ .../sycl_link_common_dep_optional_feature.cpp | 144 +++++ sycl/test/abi/sycl_symbols_linux.dump | 1 + sycl/test/abi/sycl_symbols_windows.dump | 1 + .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 18 files changed, 1636 insertions(+), 341 deletions(-) create mode 100644 sycl/test-e2e/KernelCompiler/sycl_link.cpp create mode 100644 sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp create mode 100644 sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp diff --git a/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h b/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h index 753dc70cb8df9..a4224ff56c90d 100644 --- a/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h @@ -300,6 +300,11 @@ std::unique_ptr getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, bool EmitOnlyKernelsAsEntryPoints); +std::unique_ptr +getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, + bool EmitOnlyKernelsAsEntryPoints, + bool OverwriteAllowDeviceImageDependencies); + #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *Msg = "", int Tab = 0); void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false, diff --git a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp index 99e5f9fb29bf4..3bac846dea191 100644 --- a/llvm/lib/SYCLPostLink/ModuleSplitter.cpp +++ b/llvm/lib/SYCLPostLink/ModuleSplitter.cpp @@ -1153,8 +1153,19 @@ std::string FunctionsCategorizer::computeCategoryFor(Function *F) const { std::unique_ptr getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, bool EmitOnlyKernelsAsEntryPoints) { + return getDeviceCodeSplitter(std::move(MD), Mode, IROutputOnly, + EmitOnlyKernelsAsEntryPoints, + AllowDeviceImageDependencies); +} + +std::unique_ptr +getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, + bool EmitOnlyKernelsAsEntryPoints, + bool OverwriteAllowDeviceImageDependencies) { FunctionsCategorizer Categorizer; + AllowDeviceImageDependencies = OverwriteAllowDeviceImageDependencies; + EntryPointsGroupScope Scope = selectDeviceCodeGroupScope(MD.getModule(), Mode, IROutputOnly); diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index dd8845b71c0ed..a6ddbb6fbba50 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -648,10 +648,16 @@ jit_compiler::performPostLink(std::unique_ptr Module, const auto SplitMode = getDeviceCodeSplitMode(UserArgList); + const bool AllowDeviceImageDependencies = UserArgList.hasFlag( + options::OPT_fsycl_allow_device_image_dependencies, + options::OPT_fno_sycl_allow_device_image_dependencies, false); + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by // `shouldEmitOnlyKernelsAsEntryPoints` in // `clang/lib/Driver/ToolChains/Clang.cpp`. - const bool EmitOnlyKernelsAsEntryPoints = true; + // If we allow device image dependencies, we should definitely not only emit + // kernels as entry points. + const bool EmitOnlyKernelsAsEntryPoints = !AllowDeviceImageDependencies; // TODO: The optlevel passed to `sycl-post-link` is determined by // `getSYCLPostLinkOptimizationLevel` in @@ -684,7 +690,8 @@ jit_compiler::performPostLink(std::unique_ptr Module, std::unique_ptr Splitter = getDeviceCodeSplitter( ModuleDesc{std::move(Module)}, SplitMode, - /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); + /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints, + AllowDeviceImageDependencies); assert(Splitter->hasMoreSplits()); if (auto Err = Splitter->verifyNoCrossModuleDeviceGlobalUsage()) { diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index aa7bed8f123db..abf288adbebed 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -271,8 +271,8 @@ kernel_bundle build( _Constraints:_ Available only when `PropertyListT` is an instance of `sycl::ext::oneapi::experimental::properties` which contains no properties -other than those listed below in the section "New properties for the `build` -function". +other than those listed below in the section "New properties for the `build` and +`compile` functions". _Effects (1):_ The source code from `sourceBundle` is translated into one or more device images of state `bundle_state::executable`, and a new kernel bundle is @@ -317,6 +317,80 @@ source code used to create the kernel bundle being printed to the terminal. In situations where this is undesirable, developers must ensure that the exception is caught and handled appropriately. _{endnote}_] + +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +namespace sycl::ext::oneapi::experimental { + +template (1) +kernel_bundle compile( + const kernel_bundle& source, + const std::vector& devs, PropertyListT props={}) + +template (2) +kernel_bundle build( + const kernel_bundle& sourceBundle, + PropertyListT props = {}) + +} // namespace sycl::ext::oneapi::experimental +---- +!==== + + +_Constraints:_ Available only when `PropertyListT` is an instance of +`sycl::ext::oneapi::experimental::properties` which contains no properties +other than those listed below in the section "New properties for the `build` and +`compile` functions". + +_Effects (1):_ The source code from `sourceBundle` is translated into one or +more device images of state `bundle_state::object`, and a new kernel bundle is +created to contain these device images. +The new bundle represents all of the kernels in `sourceBundle` that are +compatible with at least one of the devices in `devs`. +Any remaining kernels (those that are not compatible with any of the devices in +`devs`) are not represented in the new kernel bundle. + +The new bundle has the same associated context as `sourceBundle`, and the new +bundle's set of associated devices is `devs` (with duplicate devices removed). + +_Effects (2)_: Equivalent to +`compile(sourceBundle, sourceBundle.get_devices(), props)`. + +_Returns:_ The newly created kernel bundle, which has `object` state. + +_Throws:_ + +* An `exception` with the `errc::invalid` error code if `source` was not created + with `source_language::sycl` or was the result of `sycl::join` taking one or + more `kernel_bundle` objects not created with `source_language::sycl`. + +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` is not contained by the context associated with `sourceBundle`. + +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` does not support compilation of kernels in the source language of + `sourceBundle`. + +* An `exception` with the `errc::invalid` error code if `props` contains an + `options` property that specifies an invalid option. + +* An `exception` with the `errc::build` error code if the compilation operation + fails. In this case, the exception `what` string provides a full build log, + including descriptions of any errors, warning messages, and other + diagnostics. + This string is intended for human consumption, and the format may not be + stable across implementations of this extension. + +[_Note:_ An uncaught `errc::build` exception may result in some or all of the +source code used to create the kernel bundle being printed to the terminal. +In situations where this is undesirable, developers must ensure that the +exception is caught and handled appropriately. +_{endnote}_] + |==== === New properties for the `create_kernel_bundle_from_source` function @@ -384,10 +458,10 @@ _Throws (3):_ entry with `name` in this property. |==== -=== New properties for the `build` function +=== New properties for the `build` and `compile` functions This extension adds the following properties, which can be used in conjunction -with the `build` function that is defined above: +with the `build` and `compile` function that is defined above: |==== a| diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index bd7fac34d61b8..0a8c3a03c9282 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -1183,6 +1183,36 @@ build_from_source(kernel_bundle &SourceKB, } return build_from_source(SourceKB, Devices, Options, nullptr, KernelNames); } + +__SYCL_EXPORT kernel_bundle compile_from_source( + kernel_bundle &SourceKB, + const std::vector &Devices, + const std::vector &CompileOptions, + sycl::detail::string *LogPtr, + const std::vector &RegisteredKernelNames); + +inline kernel_bundle +compile_from_source(kernel_bundle &SourceKB, + const std::vector &Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { + std::vector Options; + for (const std::string &opt : CompileOptions) + Options.push_back(sycl::detail::string_view{opt}); + + std::vector KernelNames; + for (const std::string &name : RegisteredKernelNames) + KernelNames.push_back(sycl::detail::string_view{name}); + + sycl::detail::string Log; + auto result = compile_from_source(SourceKB, Devices, Options, + LogPtr ? &Log : nullptr, KernelNames); + if (LogPtr) + *LogPtr = Log.c_str(); + return result; +} + } // namespace detail ///////////////////////// @@ -1220,6 +1250,39 @@ kernel_bundle create_kernel_bundle_from_source( } #endif +///////////////////////// +// syclex::compile(source_kb) => obj_kb +///////////////////////// + +template >> +kernel_bundle +compile(kernel_bundle &SourceKB, + const std::vector &Devices, PropertyListT props = {}) { + std::vector CompileOptionsVec; + std::string *LogPtr = nullptr; + std::vector RegisteredKernelNamesVec; + if constexpr (props.template has_property()) + CompileOptionsVec = props.template get_property().opts; + if constexpr (props.template has_property()) + LogPtr = props.template get_property().log; + if constexpr (props.template has_property()) + RegisteredKernelNamesVec = + props.template get_property().names; + return detail::compile_from_source(SourceKB, Devices, CompileOptionsVec, + LogPtr, RegisteredKernelNamesVec); +} + +template >> +kernel_bundle +compile(kernel_bundle &SourceKB, + PropertyListT props = {}) { + return compile(SourceKB, SourceKB.get_devices(), props); +} + ///////////////////////// // syclex::build(source_kb) => exe_kb ///////////////////////// diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 373241cd026c0..84ff09b333423 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -13,8 +13,11 @@ #include #include +#include #include +#include #include +#include namespace sycl { inline namespace _V1 { @@ -147,21 +150,26 @@ void RTDeviceBinaryImage::dump(std::ostream &Out) const { sycl_device_binary_property RTDeviceBinaryImage::getProperty(const char *PropName) const { - RTDeviceBinaryImage::PropertyRange BoolProp; - BoolProp.init(Bin, __SYCL_PROPERTY_SET_SYCL_MISC_PROP); - if (!BoolProp.isAvailable()) + if (!Misc.isAvailable()) return nullptr; - auto It = std::find_if(BoolProp.begin(), BoolProp.end(), + auto It = std::find_if(Misc.begin(), Misc.end(), [=](sycl_device_binary_property Prop) { return !strcmp(PropName, Prop->Name); }); - if (It == BoolProp.end()) + if (It == Misc.end()) return nullptr; return *It; } void RTDeviceBinaryImage::init(sycl_device_binary Bin) { + ImageId = ImageCounter++; + + // If there was no binary, we let the owner handle initialization and they see + // fit. This is used when merging binaries, e.g. during linking. + if (!Bin) + return; + // Bin != nullptr is guaranteed here. this->Bin = Bin; // If device binary image format wasn't set by its producer, then can't change @@ -198,16 +206,12 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { HostPipes.init(Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); VirtualFunctions.init(Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); RegisteredKernels.init(Bin, __SYCL_PROPERTY_SET_SYCL_REGISTERED_KERNELS); - - ImageId = ImageCounter++; + Misc.init(Bin, __SYCL_PROPERTY_SET_SYCL_MISC_PROP); } std::atomic RTDeviceBinaryImage::ImageCounter = 1; -DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( - std::unique_ptr &&DataPtr, size_t DataSize) - : RTDeviceBinaryImage() { - Data = std::move(DataPtr); +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { Bin = new sycl_device_binary_struct(); Bin->Version = SYCL_DEVICE_BINARY_VERSION; Bin->Kind = SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL; @@ -215,10 +219,21 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( Bin->LinkOptions = ""; Bin->ManifestStart = nullptr; Bin->ManifestEnd = nullptr; - Bin->BinaryStart = reinterpret_cast(Data.get()); - Bin->BinaryEnd = Bin->BinaryStart + DataSize; + Bin->BinaryStart = nullptr; + Bin->BinaryEnd = nullptr; Bin->EntriesBegin = nullptr; Bin->EntriesEnd = nullptr; + Bin->Format = SYCL_DEVICE_BINARY_TYPE_NONE; + Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; +} + +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize) + : DynRTDeviceBinaryImage() { + Data = std::move(DataPtr); + Bin->BinaryStart = reinterpret_cast(Data.get()); + Bin->BinaryEnd = Bin->BinaryStart + DataSize; Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); switch (Bin->Format) { case SYCL_DEVICE_BINARY_TYPE_SPIRV: @@ -235,6 +250,420 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } +// Exclusive property merge logic. It assumes there are no cases where +// properties have different values and throws otherwise. +template +static std::vector +NaiveMergeBinaryProperties(const std::vector &Imgs, + const RangeGetterT &RangeGetter) { + size_t PropertiesCount = 0; + for (const RTDeviceBinaryImage *Img : Imgs) + PropertiesCount += RangeGetter(*Img).size(); + + std::vector Props; + Props.reserve(PropertiesCount); + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = RangeGetter(*Img); + Props.insert(Props.end(), Range.begin(), Range.end()); + } + + return Props; +} + +// Exclusive property merge logic. If IgnoreDuplicates is false it assumes there +// are no cases where properties have different values and throws otherwise. +template +static std::map +ExclusiveMergeBinaryProperties( + const std::vector &Imgs, + const RangeGetterT &RangeGetter, bool IgnoreDuplicates = false) { + std::map MergeMap; + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = RangeGetter(*Img); + for (const sycl_device_binary_property Prop : Range) { + const auto [It, Inserted] = + MergeMap.try_emplace(std::string_view{Prop->Name}, Prop); + if (IgnoreDuplicates || Inserted) + continue; + // If we didn't insert a new entry, check that the old entry had the + // exact same value. + const sycl_device_binary_property OtherProp = It->second; + if (OtherProp->Type != Prop->Type || + OtherProp->ValSize != Prop->ValSize || + (Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY && + std::memcmp(OtherProp->ValAddr, Prop->ValAddr, Prop->ValSize) != 0)) + throw sycl::exception(make_error_code(errc::invalid), + "Unable to merge incompatible images."); + } + } + return MergeMap; +} + +// Device requirements needs the ability to produce new properties. The +// information for these are kept in this struct. +struct MergedDeviceRequirements { + std::map MergeMap; + std::set Aspects; + std::set JointMatrix; + std::set JointMatrixMad; + + size_t GetPropertiesCount() const { + return MergeMap.size() + !Aspects.empty() + !JointMatrix.empty() + + !JointMatrixMad.empty(); + } + + size_t GetAspectsContentSize() const { + return Aspects.size() * sizeof(uint32_t); + } + + static size_t GetStringSetContentSize(const std::set &Set) { + size_t Result = 0; + Result += Set.size() - 1; // Semi-colon delimiters. + for (const std::string_view &Str : Set) // Strings. + Result += Str.size(); + return Result; + } + + size_t GetPropertiesContentByteSize() const { + size_t Result = 0; + for (const auto &PropIt : MergeMap) + Result += strlen(PropIt.second->Name) + 1 + PropIt.second->ValSize; + + if (!Aspects.empty()) + Result += strlen("aspects") + 1 + GetAspectsContentSize(); + + if (!JointMatrix.empty()) + Result += + strlen("joint_matrix") + 1 + GetStringSetContentSize(JointMatrix); + + if (!JointMatrixMad.empty()) + Result += strlen("joint_matrix_mad") + 1 + + GetStringSetContentSize(JointMatrixMad); + + return Result; + } + + void WriteAspectProperty(sycl_device_binary_property &NextFreeProperty, + char *&NextFreeContent) const { + if (Aspects.empty()) + return; + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; + NewProperty->ValSize = GetAspectsContentSize(); + // Copy the name. + const size_t NameLen = std::strlen("aspects"); + std::memcpy(NextFreeContent, "aspects", NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. + uint32_t *AspectContentIt = reinterpret_cast(NextFreeContent); + for (uint32_t Aspect : Aspects) + *(AspectContentIt++) = Aspect; + NewProperty->ValAddr = NextFreeContent; + NextFreeContent += NewProperty->ValSize; + } + + static void WriteStringSetProperty( + const std::set &Set, const char *SetName, + sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent) { + if (Set.empty()) + return; + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; + NewProperty->ValSize = GetStringSetContentSize(Set); + // Copy the name. + const size_t NameLen = std::strlen(SetName); + std::memcpy(NextFreeContent, SetName, NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. + NewProperty->ValAddr = NextFreeContent; + for (auto StrIt = Set.begin(); StrIt != Set.end(); ++StrIt) { + if (StrIt != Set.begin()) + *(NextFreeContent++) = ';'; + std::memcpy(NextFreeContent, StrIt->data(), StrIt->size()); + NextFreeContent += StrIt->size(); + } + } +}; + +// Merging device requirements is a little more involved, as it may impose +// new requirements. +static MergedDeviceRequirements +MergeDeviceRequirements(const std::vector &Imgs) { + MergedDeviceRequirements MergedReqs; + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = + Img->getDeviceRequirements(); + for (const sycl_device_binary_property Prop : Range) { + std::string_view NameView = std::string_view{Prop->Name}; + + // Aspects we collect in a set early and add them afterwards. + if (NameView == std::string_view{"aspects"}) { + // Skip size bytes. + auto AspectIt = reinterpret_cast( + reinterpret_cast(Prop->ValAddr) + 8); + for (size_t I = 0; I < Prop->ValSize / sizeof(uint32_t); ++I) + MergedReqs.Aspects.emplace(AspectIt[I]); + continue; + } + + // joint_matrix and joint_matrix_mad have the same format, so we parse + // them the same way. + if (NameView == std::string_view{"joint_matrix"} || + NameView == std::string_view{"joint_matrix_mad"}) { + std::set &Set = + NameView == std::string_view{"joint_matrix"} + ? MergedReqs.JointMatrix + : MergedReqs.JointMatrixMad; + + // Skip size bytes. + std::string_view Contents{reinterpret_cast(Prop->ValAddr) + 8, + Prop->ValSize}; + size_t Pos = 0; + do { + const size_t NextPos = Contents.find(';', Pos); + if (NextPos != Pos) + Set.emplace(Contents.substr(Pos, NextPos - Pos)); + Pos = NextPos + 1; + } while (Pos != 0); + continue; + } + + const auto [It, Inserted] = + MergedReqs.MergeMap.try_emplace(NameView, Prop); + if (Inserted) + continue; + // Special handling has already happened, so we assume the rest are + // exclusive property values. + const sycl_device_binary_property OtherProp = It->second; + if (OtherProp->Type != Prop->Type || + OtherProp->ValSize != Prop->ValSize || + (Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY && + std::memcmp(OtherProp->ValAddr, Prop->ValAddr, Prop->ValSize) != 0)) + throw sycl::exception(make_error_code(errc::invalid), + "Unable to merge incompatible images."); + } + } + return MergedReqs; +} + +// Copies a property into new memory. +static void CopyProperty(sycl_device_binary_property &NextFreeProperty, + char *&NextFreeContent, + const sycl_device_binary_property OldProperty) { + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = OldProperty->Type; + NewProperty->ValSize = OldProperty->ValSize; + // Copy the name. + const size_t NameLen = std::strlen(OldProperty->Name); + std::memcpy(NextFreeContent, OldProperty->Name, NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. If the type is uint32 it will have been stored in the size + // instead of the value address. + if (OldProperty->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY) { + std::memcpy(NextFreeContent, OldProperty->ValAddr, OldProperty->ValSize); + NewProperty->ValAddr = NextFreeContent; + NextFreeContent += OldProperty->ValSize; + } else { + NewProperty->ValAddr = nullptr; + } +} + +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + const std::vector &Imgs) + : DynRTDeviceBinaryImage() { + init(nullptr); + + // Naive merges. + auto MergedSpecConstants = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getSpecConstants(); + }); + auto MergedSpecConstantsDefaultValues = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getSpecConstantsDefaultValues(); + }); + auto MergedKernelParamOptInfo = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getKernelParamOptInfo(); + }); + auto MergedAssertUsed = NaiveMergeBinaryProperties( + Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getAssertUsed(); }); + auto MergedDeviceGlobals = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getDeviceGlobals(); + }); + auto MergedHostPipes = NaiveMergeBinaryProperties( + Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getHostPipes(); }); + auto MergedVirtualFunctions = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getVirtualFunctions(); + }); + auto MergedImplicitLocalArg = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getImplicitLocalArg(); + }); + auto MergedExportedSymbols = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getExportedSymbols(); + }); + auto MergedRegisteredKernels = + NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getRegisteredKernels(); + }); + + std::array *, 10> MergedVecs{ + &MergedSpecConstants, &MergedSpecConstantsDefaultValues, + &MergedKernelParamOptInfo, &MergedAssertUsed, + &MergedDeviceGlobals, &MergedHostPipes, + &MergedVirtualFunctions, &MergedImplicitLocalArg, + &MergedExportedSymbols, &MergedRegisteredKernels}; + + // Exclusive merges. + auto MergedDeviceLibReqMask = + ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getDeviceLibReqMask(); + }); + auto MergedProgramMetadata = + ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getProgramMetadata(); + }); + auto MergedImportedSymbols = ExclusiveMergeBinaryProperties( + Imgs, + [](const RTDeviceBinaryImage &Img) { return Img.getImportedSymbols(); }, + /*IgnoreDuplicates=*/true); + auto MergedMisc = + ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getMiscProperties(); + }); + + std::array< + const std::map *, 4> + MergedMaps{&MergedDeviceLibReqMask, &MergedProgramMetadata, + &MergedImportedSymbols, &MergedMisc}; + + // When merging exported and imported, the exported symbols may cancel out + // some of the imported symbols. + for (const sycl_device_binary_property Prop : MergedExportedSymbols) + MergedImportedSymbols.erase(std::string_view{Prop->Name}); + + // For device requirements we need to do special handling to merge the + // property values as well. + MergedDeviceRequirements MergedDevReqs = MergeDeviceRequirements(Imgs); + + // Now that we have merged all properties, we need to calculate how much + // memory we need to store the new property sets. + constexpr size_t PropertyByteSize = + sizeof(_sycl_device_binary_property_struct); + constexpr size_t PropertyAlignment = + alignof(_sycl_device_binary_property_struct); + constexpr size_t PaddedPropertyByteSize = + (1 + ((PropertyByteSize - 1) / PropertyAlignment)) * PropertyAlignment; + + // Count the total number of property entries. + size_t PropertyCount = 0; + for (const auto &Vec : MergedVecs) + PropertyCount += Vec->size(); + for (const auto &Map : MergedMaps) + PropertyCount += Map->size(); + PropertyCount += MergedDevReqs.GetPropertiesCount(); + + // Count the bytes needed for the values and names of the properties. + auto GetPropertyContentSize = [](const sycl_device_binary_property Prop) { + return Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? Prop->ValSize : 0; + }; + size_t PropertyContentByteSize = 0; + for (const auto &Vec : MergedVecs) + for (const auto &Prop : *Vec) + PropertyContentByteSize += + strlen(Prop->Name) + 1 + GetPropertyContentSize(Prop); + for (const auto &Map : MergedMaps) + for (const auto &PropIt : *Map) + PropertyContentByteSize += strlen(PropIt.second->Name) + 1 + + GetPropertyContentSize(PropIt.second); + PropertyContentByteSize += MergedDevReqs.GetPropertiesContentByteSize(); + + const size_t PropertySectionSize = PropertyCount * PaddedPropertyByteSize; + + // Allocate the memory aligned to the property entry alignment. + // Note: MSVC does not implement std::aligned_alloc. + Data = std::unique_ptr>( +#ifdef _MSC_VER + static_cast(_aligned_malloc(sizeof(char) * PropertySectionSize + + PropertyContentByteSize, + PropertyAlignment)), + _aligned_free +#else + static_cast(std::aligned_alloc( + PropertyAlignment, + sizeof(char) * PropertySectionSize + PropertyContentByteSize)), + std::free +#endif + ); + + auto NextFreeProperty = + reinterpret_cast(Data.get()); + char *NextFreeContent = Data.get() + PropertySectionSize; + + auto CopyPropertiesVec = + [&](const auto &Properties, + RTDeviceBinaryImage::PropertyRange &TargetRange) { + if (Properties.empty()) + return; + TargetRange.Begin = NextFreeProperty; + for (const sycl_device_binary_property Prop : Properties) + CopyProperty(NextFreeProperty, NextFreeContent, Prop); + TargetRange.End = NextFreeProperty; + }; + auto CopyPropertiesMap = + [&](const auto &Properties, + RTDeviceBinaryImage::PropertyRange &TargetRange) { + if (Properties.empty()) + return; + TargetRange.Begin = NextFreeProperty; + for (const auto &PropIt : Properties) + CopyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + TargetRange.End = NextFreeProperty; + }; + + CopyPropertiesVec(MergedSpecConstants, SpecConstIDMap); + CopyPropertiesVec(MergedSpecConstantsDefaultValues, + SpecConstDefaultValuesMap); + CopyPropertiesVec(MergedKernelParamOptInfo, KernelParamOptInfo); + CopyPropertiesVec(MergedAssertUsed, AssertUsed); + CopyPropertiesVec(MergedDeviceGlobals, DeviceGlobals); + CopyPropertiesVec(MergedHostPipes, HostPipes); + CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions); + CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg); + CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); + CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); + + CopyPropertiesMap(MergedDeviceLibReqMask, DeviceLibReqMask); + CopyPropertiesMap(MergedProgramMetadata, ProgramMetadata); + CopyPropertiesMap(MergedImportedSymbols, ImportedSymbols); + CopyPropertiesMap(MergedMisc, Misc); + + // Special handling for new device requirements. + { + DeviceRequirements.Begin = NextFreeProperty; + for (const auto &PropIt : MergedDevReqs.MergeMap) + CopyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + MergedDevReqs.WriteAspectProperty(NextFreeProperty, NextFreeContent); + MergedDeviceRequirements::WriteStringSetProperty( + MergedDevReqs.JointMatrix, "joint_matrix", NextFreeProperty, + NextFreeContent); + MergedDeviceRequirements::WriteStringSetProperty( + MergedDevReqs.JointMatrixMad, "joint_matrix_mad", NextFreeProperty, + NextFreeContent); + DeviceRequirements.End = NextFreeProperty; + } +} + #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 370c0af509d70..0069f35fd11df 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -121,6 +121,7 @@ class RTDeviceBinaryImage { size_t size() const { return std::distance(begin(), end()); } bool empty() const { return begin() == end(); } friend class RTDeviceBinaryImage; + friend class DynRTDeviceBinaryImage; bool isAvailable() const { return !(Begin == nullptr); } private: @@ -238,6 +239,7 @@ class RTDeviceBinaryImage { const PropertyRange &getRegisteredKernels() const { return RegisteredKernels; } + const PropertyRange &getMiscProperties() const { return Misc; } std::uintptr_t getImageID() const { assert(Bin && "Image ID is not available without a binary image."); @@ -245,6 +247,7 @@ class RTDeviceBinaryImage { } protected: + void init(); void init(sycl_device_binary Bin); sycl_device_binary get() const { return Bin; } @@ -266,6 +269,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange VirtualFunctions; RTDeviceBinaryImage::PropertyRange ImplicitLocalArg; RTDeviceBinaryImage::PropertyRange RegisteredKernels; + RTDeviceBinaryImage::PropertyRange Misc; std::vector ProgramMetadataUR; @@ -275,19 +279,29 @@ class RTDeviceBinaryImage { }; // Dynamically allocated device binary image, which de-allocates its binary -// data in destructor. +// data and associated metadata in destructor. class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { public: - DynRTDeviceBinaryImage(std::unique_ptr &&DataPtr, size_t DataSize); + DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize); ~DynRTDeviceBinaryImage() override; + // Merge ctor + DynRTDeviceBinaryImage(const std::vector &Imgs); + void print() const override { RTDeviceBinaryImage::print(); std::cerr << " DYNAMICALLY CREATED\n"; } + static DynRTDeviceBinaryImage + merge(const std::vector &Imgs); + protected: - std::unique_ptr Data; + DynRTDeviceBinaryImage(); + + std::unique_ptr> Data; }; #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 4b4d454475eb7..4920f60b79b84 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -162,7 +162,7 @@ struct KernelCompilerBinaryInfo { std::shared_ptr &&DeviceGlobalRegistry) : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, MMangledKernelNames{std::move(MangledKernelNames)}, - MPrefix{std::move(Prefix)}, + MPrefixes{std::move(Prefix)}, MDeviceGlobalRegistries{std::move(DeviceGlobalRegistry)} {} static std::optional @@ -185,12 +185,6 @@ struct KernelCompilerBinaryInfo { "Linking binaries with different source " "languages is not currently supported."); - if (!RTCInfo->MPrefix.empty() && !Result->MPrefix.empty() && - RTCInfo->MPrefix != Result->MPrefix) - throw sycl::exception(make_error_code(errc::invalid), - "Linking binaries with different kernel prefixes " - "is not currently supported."); - for (const std::string &KernelName : RTCInfo->MKernelNames) Result->MKernelNames.insert(KernelName); @@ -216,6 +210,9 @@ struct KernelCompilerBinaryInfo { Result->MDeviceGlobalRegistries.end(), RTCInfo->MDeviceGlobalRegistries.begin(), RTCInfo->MDeviceGlobalRegistries.end()); + + for (const std::string &Prefix : RTCInfo->MPrefixes) + Result->MPrefixes.insert(Prefix); } return Result; } @@ -223,7 +220,7 @@ struct KernelCompilerBinaryInfo { syclex::source_language MLanguage; std::set MKernelNames; std::unordered_map MMangledKernelNames; - std::string MPrefix; + std::set MPrefixes; include_pairs_t MIncludePairs; std::vector> MDeviceGlobalRegistries; @@ -260,20 +257,21 @@ class device_image_impl { updateSpecConstSymMap(); } - device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, - const std::vector &Devices, bundle_state State, - std::shared_ptr> KernelIDs, - ur_program_handle_t Program, - const SpecConstMapT &SpecConstMap, - const std::vector &SpecConstsBlob, - uint8_t Origins, - std::optional &&RTCInfo) + device_image_impl( + const RTDeviceBinaryImage *BinImage, const context &Context, + std::vector &&Devices, bundle_state State, + std::shared_ptr> KernelIDs, + ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, + const std::vector &SpecConstsBlob, uint8_t Origins, + std::optional &&RTCInfo, + std::unique_ptr &&MergedImageStorage = nullptr) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MSpecConstSymMap(SpecConstMap), MOrigins(Origins), - MRTCBinInfo(std::move(RTCInfo)) {} + MRTCBinInfo(std::move(RTCInfo)), + MMergedImageStorage(std::move(MergedImageStorage)) {} device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, const std::vector &Devices, bundle_state State, @@ -624,18 +622,21 @@ class device_image_impl { std::string AdjustedName = adjustKernelName(Name); if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { auto &PM = ProgramManager::getInstance(); - auto KID = PM.tryGetSYCLKernelID(MRTCBinInfo->MPrefix + AdjustedName); - - if (!KID || !has_kernel(*KID)) - return nullptr; - - auto UrProgram = get_ur_program_ref(); - auto [UrKernel, CacheMutex, ArgMask] = - PM.getOrCreateKernel(Context, AdjustedName, - /*PropList=*/{}, UrProgram); - return std::make_shared(UrKernel, getSyclObjImpl(Context), - Self, OwnerBundle, ArgMask, - UrProgram, CacheMutex); + for (const std::string &Prefix : MRTCBinInfo->MPrefixes) { + auto KID = PM.tryGetSYCLKernelID(Prefix + AdjustedName); + + if (!KID || !has_kernel(*KID)) + continue; + + auto UrProgram = get_ur_program_ref(); + auto [UrKernel, CacheMutex, ArgMask] = + PM.getOrCreateKernel(Context, AdjustedName, + /*PropList=*/{}, UrProgram); + return std::make_shared(UrKernel, getSyclObjImpl(Context), + Self, OwnerBundle, ArgMask, + UrProgram, CacheMutex); + } + return nullptr; } ur_program_handle_t UrProgram = get_ur_program_ref(); @@ -687,7 +688,7 @@ class device_image_impl { } std::vector> buildFromSource( - const std::vector Devices, + const std::vector &Devices, const std::vector &BuildOptions, std::string *LogPtr, const std::vector &RegisteredKernelNames, std::vector> &OutDeviceBins) @@ -716,170 +717,9 @@ class device_image_impl { } } - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { - assert(std::holds_alternative(MBinImage)); - - // Build device images via the program manager. - const std::string &SourceStr = std::get(MBinImage); - std::ostringstream SourceExt; - if (!RegisteredKernelNames.empty()) { - SourceExt << SourceStr << '\n'; - - auto EmitEntry = - [&SourceExt](const std::string &Name) -> std::ostringstream & { - SourceExt << " {\"" << Name << "\", " << Name << "}"; - return SourceExt; - }; - - SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; - for (auto It = RegisteredKernelNames.begin(), - SecondToLast = RegisteredKernelNames.end() - 1; - It != SecondToLast; ++It) { - EmitEntry(*It) << ",\n"; - } - EmitEntry(RegisteredKernelNames.back()) << "\n"; - SourceExt << ")]];\n"; - } - - auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( - RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr); - - auto &PM = detail::ProgramManager::getInstance(); - - // Add all binaries and keep the images for processing. - std::vector>>> - NewImages; - NewImages.reserve(Binaries->NumDeviceBinaries); - for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { - sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); - RTDeviceBinaryImage *NewImage = nullptr; - auto KernelIDs = std::make_shared>(); - PM.addImage(Binary, &NewImage, KernelIDs.get()); - if (NewImage) - NewImages.push_back( - std::make_pair(std::move(NewImage), std::move(KernelIDs))); - } - - // Now bring all images into the proper state. Note that we do this in a - // separate pass over NewImages to make sure dependency images have been - // registered beforehand. - std::vector> Result; - Result.reserve(NewImages.size()); - for (auto &[NewImage, KernelIDs] : NewImages) { - std::set KernelNames; - std::unordered_map MangledKernelNames; - std::unordered_set DeviceGlobalIDSet; - std::vector DeviceGlobalIDVec; - std::vector DeviceGlobalNames; - std::vector> DeviceGlobalAllocations; - - for (const auto &KernelID : *KernelIDs) { - std::string_view KernelName{KernelID.get_name()}; - if (KernelName.find(Prefix) == 0) { - KernelName.remove_prefix(Prefix.length()); - KernelNames.emplace(KernelName); - static constexpr std::string_view SYCLKernelMarker{ - "__sycl_kernel_"}; - if (KernelName.find(SYCLKernelMarker) == 0) { - // extern "C" declaration, implicitly register kernel without the - // marker. - std::string_view KernelNameWithoutMarker{KernelName}; - KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); - MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); - } - } - - for (const sycl_device_binary_property &RKProp : - NewImage->getRegisteredKernels()) { - // Mangled names. - auto BA = DeviceBinaryProperty(RKProp).asByteArray(); - auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; - std::string_view MangledName{ - reinterpret_cast(BA.begin()), MangledNameLen}; - MangledKernelNames.emplace(RKProp->Name, MangledName); - } - - // Device globals. - for (const auto &DeviceGlobalProp : NewImage->getDeviceGlobals()) { - std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; - assert(DeviceGlobalName.find(Prefix) == 0); - bool Inserted = false; - std::tie(std::ignore, Inserted) = - DeviceGlobalIDSet.emplace(DeviceGlobalName); - if (Inserted) { - DeviceGlobalIDVec.emplace_back(DeviceGlobalName); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalNames.emplace_back(DeviceGlobalName); - } - } - } - - // Device globals are usually statically allocated and registered in the - // integration footer, which we don't have in the RTC context. Instead, - // we dynamically allocate storage tied to the executable kernel bundle. - for (DeviceGlobalMapEntry *DeviceGlobalEntry : - PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { - - size_t AllocSize = - DeviceGlobalEntry->MDeviceGlobalTSize; // init value - if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { - // Consider storage for device USM pointer. - AllocSize += sizeof(void *); - } - auto Alloc = std::make_unique(AllocSize); - std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; - PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); - DeviceGlobalAllocations.push_back(std::move(Alloc)); - - // Drop the RTC prefix from the entry's symbol name. Note that the PM - // still manages this device global under its prefixed name. - assert(DeviceGlobalName.find(Prefix) == 0); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalEntry->MUniqueId = DeviceGlobalName; - } - - auto DGRegs = std::make_shared( - ContextImpl, std::string{Prefix}, std::move(DeviceGlobalNames), - std::move(DeviceGlobalAllocations)); - - // Mark the image as input so the program manager will bring it into - // the right state. - auto DevImgImpl = std::make_shared( - NewImage, MContext, Devices, bundle_state::input, - std::move(KernelIDs), MRTCBinInfo->MLanguage, - std::move(KernelNames), std::move(MangledKernelNames), - std::string{Prefix}, std::move(DGRegs)); - - // Resolve dependencies. - // TODO: Consider making a collectDeviceImageDeps variant that takes a - // set reference and inserts into that instead. - std::set ImgDeps; - for (const device &Device : Devices) { - std::set DevImgDeps = - PM.collectDeviceImageDeps(*NewImage, Device); - ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); - } - - // Pack main image and dependencies together. - std::vector NewImageAndDeps; - NewImageAndDeps.reserve(1 + ImgDeps.size()); - NewImageAndDeps.push_back( - createSyclObjFromImpl(std::move(DevImgImpl))); - for (RTDeviceBinaryImage *ImgDep : ImgDeps) - NewImageAndDeps.push_back(PM.createDependencyImage( - MContext, Devices, ImgDep, bundle_state::input)); - - DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); - PM.bringSYCLDeviceImageToState(ImgWithDeps, bundle_state::executable); - Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); - } - - OutDeviceBins.emplace_back( - std::make_shared(std::move(Binaries))); - return Result; - } + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) + return createSYCLImages(Devices, bundle_state::executable, BuildOptions, + LogPtr, RegisteredKernelNames, OutDeviceBins); std::vector DeviceVec; DeviceVec.reserve(Devices.size()); @@ -897,51 +737,8 @@ class device_image_impl { const AdapterPtr &Adapter = ContextImpl->getAdapter(); - if (!FetchedFromCache) { - const auto spirv = [&]() -> std::vector { - switch (MRTCBinInfo->MLanguage) { - case syclex::source_language::opencl: { - // if successful, the log is empty. if failed, throws an error with - // the compilation log. - std::vector IPVersionVec(Devices.size()); - std::transform(DeviceVec.begin(), DeviceVec.end(), - IPVersionVec.begin(), [&](ur_device_handle_t d) { - uint32_t ipVersion = 0; - Adapter->call( - d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - return ipVersion; - }); - return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, - BuildOptions, LogPtr); - } - case syclex::source_language::spirv: { - const auto &SourceBytes = std::get>(MBinImage); - std::vector Result(SourceBytes.size()); - std::transform(SourceBytes.cbegin(), SourceBytes.cend(), - Result.begin(), - [](std::byte B) { return static_cast(B); }); - return Result; - } - default: - break; - } - throw sycl::exception( - make_error_code(errc::invalid), - "SYCL C++, OpenCL C and SPIR-V are the only supported " - "languages at this time"); - }(); - - Adapter->call( - ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, - &UrProgram); - // program created by urProgramCreateWithIL is implicitly retained. - if (UrProgram == nullptr) - throw sycl::exception( - sycl::make_error_code(errc::invalid), - "urProgramCreateWithIL resulted in a null program handle."); - - } // if(!FetchedFromCache) + if (!FetchedFromCache) + UrProgram = createProgramFromSource(Devices, BuildOptions, LogPtr); std::string XsFlags = extractXsFlags(BuildOptions); auto Res = Adapter->call_nocheck( @@ -958,18 +755,8 @@ class device_image_impl { UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, nullptr); - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); std::vector KernelNames = - detail::split_string(KernelNamesStr, ';'); + getKernelNamesFromURProgram(Adapter, UrProgram); std::set KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. @@ -985,6 +772,45 @@ class device_image_impl { MRTCBinInfo->MLanguage, std::move(KernelNameSet))}; } + std::vector> compileFromSource( + const std::vector &Devices, + const std::vector &CompileOptions, std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBins) + const { + assert(!std::holds_alternative(MBinImage)); + assert(MRTCBinInfo); + assert(MOrigins & ImageOriginKernelCompiler); + + if (MRTCBinInfo->MLanguage != syclex::source_language::sycl) + throw sycl::exception( + make_error_code(errc::invalid), + "compile is only available for kernel_bundle " + "when the source language was sycl."); + + std::shared_ptr ContextImpl = + getSyclObjImpl(MContext); + + for (const auto &SyclDev : Devices) { + DeviceImplPtr DevImpl = getSyclObjImpl(SyclDev); + if (!ContextImpl->hasDevice(DevImpl)) { + throw sycl::exception(make_error_code(errc::invalid), + "device not part of kernel_bundle context"); + } + if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + // This error cannot not be exercised in the current implementation, as + // compatibility with a source language depends on the backend's + // capabilities and all devices in one context share the same backend in + // the current implementation, so this would lead to an error already + // during construction of the source bundle. + throw sycl::exception(make_error_code(errc::invalid), + "device does not support source language"); + } + } + return createSYCLImages(Devices, bundle_state::object, CompileOptions, + LogPtr, RegisteredKernelNames, OutDeviceBins); + } + private: bool hasRTDeviceBinaryImage() const noexcept { return std::holds_alternative(MBinImage) && @@ -1145,6 +971,275 @@ class device_image_impl { } } + std::vector> + createSYCLImages(const std::vector &Devices, bundle_state State, + const std::vector &Options, std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> + &OutDeviceBins) const { + assert(MRTCBinInfo); + assert(MRTCBinInfo->MLanguage == syclex::source_language::sycl); + assert(std::holds_alternative(MBinImage)); + + // Build device images via the program manager. + const std::string &SourceStr = std::get(MBinImage); + std::ostringstream SourceExt; + if (!RegisteredKernelNames.empty()) { + SourceExt << SourceStr << '\n'; + + auto EmitEntry = + [&SourceExt](const std::string &Name) -> std::ostringstream & { + SourceExt << " {\"" << Name << "\", " << Name << "}"; + return SourceExt; + }; + + SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; + for (auto It = RegisteredKernelNames.begin(), + SecondToLast = RegisteredKernelNames.end() - 1; + It != SecondToLast; ++It) { + EmitEntry(*It) << ",\n"; + } + EmitEntry(RegisteredKernelNames.back()) << "\n"; + SourceExt << ")]];\n"; + } + + auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( + RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), + MRTCBinInfo->MIncludePairs, Options, LogPtr); + + auto &PM = detail::ProgramManager::getInstance(); + + // Add all binaries and keep the images for processing. + std::vector>>> + NewImages; + NewImages.reserve(Binaries->NumDeviceBinaries); + for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { + sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); + RTDeviceBinaryImage *NewImage = nullptr; + auto KernelIDs = std::make_shared>(); + PM.addImage(Binary, &NewImage, KernelIDs.get()); + if (NewImage) + NewImages.push_back( + std::make_pair(std::move(NewImage), std::move(KernelIDs))); + } + + // Now bring all images into the proper state. Note that we do this in a + // separate pass over NewImages to make sure dependency images have been + // registered beforehand. + std::vector> Result; + Result.reserve(NewImages.size()); + for (auto &[NewImage, KernelIDs] : NewImages) { + std::set KernelNames; + std::unordered_map MangledKernelNames; + std::unordered_set DeviceGlobalIDSet; + std::vector DeviceGlobalIDVec; + std::vector DeviceGlobalNames; + std::vector> DeviceGlobalAllocations; + + for (const auto &KernelID : *KernelIDs) { + std::string_view KernelName{KernelID.get_name()}; + if (KernelName.find(Prefix) == 0) { + KernelName.remove_prefix(Prefix.length()); + KernelNames.emplace(KernelName); + static constexpr std::string_view SYCLKernelMarker{"__sycl_kernel_"}; + if (KernelName.find(SYCLKernelMarker) == 0) { + // extern "C" declaration, implicitly register kernel without the + // marker. + std::string_view KernelNameWithoutMarker{KernelName}; + KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); + MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); + } + } + + for (const sycl_device_binary_property &RKProp : + NewImage->getRegisteredKernels()) { + // Mangled names. + auto BA = DeviceBinaryProperty(RKProp).asByteArray(); + auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; + std::string_view MangledName{ + reinterpret_cast(BA.begin()), MangledNameLen}; + MangledKernelNames.emplace(RKProp->Name, MangledName); + } + + // Device globals. + for (const auto &DeviceGlobalProp : NewImage->getDeviceGlobals()) { + std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; + assert(DeviceGlobalName.find(Prefix) == 0); + bool Inserted = false; + std::tie(std::ignore, Inserted) = + DeviceGlobalIDSet.emplace(DeviceGlobalName); + if (Inserted) { + DeviceGlobalIDVec.emplace_back(DeviceGlobalName); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalNames.emplace_back(DeviceGlobalName); + } + } + } + + // Device globals are usually statically allocated and registered in the + // integration footer, which we don't have in the RTC context. Instead, + // we dynamically allocate storage tied to the executable kernel bundle. + for (DeviceGlobalMapEntry *DeviceGlobalEntry : + PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { + + size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value + if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { + // Consider storage for device USM pointer. + AllocSize += sizeof(void *); + } + auto Alloc = std::make_unique(AllocSize); + std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; + PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); + DeviceGlobalAllocations.push_back(std::move(Alloc)); + + // Drop the RTC prefix from the entry's symbol name. Note that the PM + // still manages this device global under its prefixed name. + assert(DeviceGlobalName.find(Prefix) == 0); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalEntry->MUniqueId = DeviceGlobalName; + } + + auto DGRegs = std::make_shared( + getSyclObjImpl(MContext), std::string{Prefix}, + std::move(DeviceGlobalNames), std::move(DeviceGlobalAllocations)); + + // Filter the devices that support the image requirements. + std::vector SupportingDevs = Devices; + auto NewSupportingDevsEnd = std::remove_if( + SupportingDevs.begin(), SupportingDevs.end(), + [NewImage](const sycl::device &SDev) { + return !doesDevSupportDeviceRequirements(SDev, *NewImage); + }); + SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); + + // Mark the image as input so the program manager will bring it into + // the right state. + auto DevImgImpl = std::make_shared( + NewImage, MContext, std::move(SupportingDevs), bundle_state::input, + std::move(KernelIDs), MRTCBinInfo->MLanguage, std::move(KernelNames), + std::move(MangledKernelNames), std::string{Prefix}, + std::move(DGRegs)); + + // Resolve dependencies. + // If we are compiling to object, we do not want to error for unresolved + // imports. + // TODO: Consider making a collectDeviceImageDeps variant that takes a + // set reference and inserts into that instead. + std::set ImgDeps; + for (const device &Device : DevImgImpl->get_devices()) { + std::set DevImgDeps = PM.collectDeviceImageDeps( + *NewImage, Device, + /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); + ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); + } + + // Pack main image and dependencies together. + std::vector NewImageAndDeps; + NewImageAndDeps.reserve( + 1 + ((State == bundle_state::executable) * ImgDeps.size())); + NewImageAndDeps.push_back( + createSyclObjFromImpl(std::move(DevImgImpl))); + const std::vector &SupportingDevsRef = + getSyclObjImpl(NewImageAndDeps[0])->get_devices(); + if (State == bundle_state::executable) { + // If target is executable we bundle the image and dependencies together + // and bring it into state. + for (RTDeviceBinaryImage *ImgDep : ImgDeps) + NewImageAndDeps.push_back(PM.createDependencyImage( + MContext, SupportingDevsRef, ImgDep, bundle_state::input)); + } else if (State == bundle_state::object) { + // If the target is object, we bring the dependencies into object state + // individually and put them in the bundle. + for (RTDeviceBinaryImage *ImgDep : ImgDeps) { + DevImgPlainWithDeps ImgDepWithDeps{PM.createDependencyImage( + MContext, SupportingDevsRef, ImgDep, bundle_state::input)}; + PM.bringSYCLDeviceImageToState(ImgDepWithDeps, State); + Result.push_back(getSyclObjImpl(ImgDepWithDeps.getMain())); + } + } + + DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); + PM.bringSYCLDeviceImageToState(ImgWithDeps, State); + Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); + } + + OutDeviceBins.emplace_back( + std::make_shared(std::move(Binaries))); + return Result; + } + + ur_program_handle_t + createProgramFromSource(const std::vector Devices, + const std::vector &Options, + std::string *LogPtr) const { + const std::shared_ptr &ContextImpl = + getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const auto spirv = [&]() -> std::vector { + switch (MRTCBinInfo->MLanguage) { + case syclex::source_language::opencl: { + // if successful, the log is empty. if failed, throws an error with + // the compilation log. + const auto &SourceStr = std::get(MBinImage); + std::vector IPVersionVec(Devices.size()); + std::transform(Devices.begin(), Devices.end(), IPVersionVec.begin(), + [&](const sycl::device &SyclDev) { + uint32_t ipVersion = 0; + Adapter->call( + getSyclObjImpl(SyclDev)->getHandleRef(), + UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), + &ipVersion, nullptr); + return ipVersion; + }); + return syclex::detail::OpenCLC_to_SPIRV(SourceStr, IPVersionVec, + Options, LogPtr); + } + case syclex::source_language::spirv: { + const auto &SourceBytes = std::get>(MBinImage); + std::vector Result(SourceBytes.size()); + std::transform(SourceBytes.cbegin(), SourceBytes.cend(), Result.begin(), + [](std::byte B) { return static_cast(B); }); + return Result; + } + default: + break; + } + throw sycl::exception( + make_error_code(errc::invalid), + "SYCL C++, OpenCL C and SPIR-V are the only supported " + "languages at this time"); + }(); + + ur_program_handle_t UrProgram = nullptr; + Adapter->call(ContextImpl->getHandleRef(), + spirv.data(), spirv.size(), + nullptr, &UrProgram); + // program created by urProgramCreateWithIL is implicitly retained. + if (UrProgram == nullptr) + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "urProgramCreateWithIL resulted in a null program handle."); + + return UrProgram; + } + + static std::vector + getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram) { + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + return detail::split_string(KernelNamesStr, ';'); + } + const std::variant, const RTDeviceBinaryImage *> MBinImage = static_cast(nullptr); @@ -1181,6 +1276,9 @@ class device_image_impl { // Optional information about the binary produced by the kernel compiler // extension. std::optional MRTCBinInfo = std::nullopt; + + // Used to store a dynamically created merged binary image, e.g. from linking. + std::unique_ptr MMergedImageStorage = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 3cac4e5b2001f..0b0687a9abf4b 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -180,7 +181,6 @@ class kernel_bundle_impl { const std::vector> &ObjectBundles, std::vector Devs, const property_list &PropList) : MDevices(std::move(Devs)), MState(bundle_state::executable) { - if (MDevices.empty()) throw sycl::exception(make_error_code(errc::invalid), "Vector of devices is empty"); @@ -219,35 +219,139 @@ class kernel_bundle_impl { // TODO: Unify with c'tor for sycl::compile and sycl::build by calling // sycl::join on vector of kernel_bundles - // The loop below just links each device image separately, not linking any - // two device images together. This is correct so long as each device image - // has no unresolved symbols. That's the case when device images are created - // from generic SYCL APIs. There's no way in generic SYCL to create a kernel - // which references an undefined symbol. If we decide in the future to allow - // a backend interop API to create a "sycl::kernel_bundle" that references - // undefined symbols, then the logic in this loop will need to be changed. + // Due to a bug in L0, specializations with conflicting IDs will overwrite + // each other when linked together, so to avoid this issue we link + // regular offline-compiled SYCL device images in separation. + // TODO: Remove when spec const overwriting issue has been fixed in L0. + std::vector OfflineDeviceImages; + std::set> OfflineDeviceImageSet; for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const DevImgPlainWithDeps &DeviceImageWithDeps : getSyclObjImpl(ObjectBundle)->MDeviceImages) { + if (getSyclObjImpl(DeviceImageWithDeps.getMain())->getOriginMask() & + ImageOriginSYCLOffline) { + OfflineDeviceImages.push_back(&DeviceImageWithDeps); + for (const device_image_plain &DevImg : DeviceImageWithDeps) + OfflineDeviceImageSet.insert(getSyclObjImpl(DevImg)); + } + } + } - // Skip images which are not compatible with devices provided - if (std::none_of(MDevices.begin(), MDevices.end(), - [&DeviceImageWithDeps](const device &Dev) { - return getSyclObjImpl(DeviceImageWithDeps.getMain()) - ->compatible_with_device(Dev); - })) - continue; - - std::vector LinkedResults = - detail::ProgramManager::getInstance().link(DeviceImageWithDeps, - MDevices, PropList); - MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), - LinkedResults.end()); - MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), - LinkedResults.begin(), LinkedResults.end()); + // Collect all images. + std::vector DevImages; + for (const kernel_bundle &ObjectBundle : + ObjectBundles) + for (const device_image_plain &DevImg : + getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages) + if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) == + OfflineDeviceImageSet.end()) + DevImages.push_back(DevImg); + + // Create a map between exported symbols and their indices in the device + // images collection. + std::map ExportMap; + for (size_t I = 0; I < DevImages.size(); ++I) { + auto DevImageImpl = getSyclObjImpl(DevImages[I]); + if (DevImageImpl->get_bin_image_ref() == nullptr) + continue; + for (const sycl_device_binary_property &ESProp : + DevImageImpl->get_bin_image_ref()->getExportedSymbols()) { + if (ExportMap.find(ESProp->Name) != ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "Duplicate exported symbol \"" + + std::string{ESProp->Name} + + "\" found in binaries."); + ExportMap.emplace(ESProp->Name, I); + } + } + + // Create dependency mappings. + std::vector> Dependencies; + Dependencies.resize(DevImages.size()); + for (size_t I = 0; I < DevImages.size(); ++I) { + auto DevImageImpl = getSyclObjImpl(DevImages[I]); + if (DevImageImpl->get_bin_image_ref() == nullptr) + continue; + std::set DeviceImageDepsSet; + for (const sycl_device_binary_property &ISProp : + DevImageImpl->get_bin_image_ref()->getImportedSymbols()) { + auto ExportSymbolIt = ExportMap.find(ISProp->Name); + if (ExportSymbolIt == ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "No exported symbol \"" + + std::string{ISProp->Name} + + "\" found in linked images."); + DeviceImageDepsSet.emplace(ExportSymbolIt->second); } + Dependencies[I].insert(Dependencies[I].end(), DeviceImageDepsSet.begin(), + DeviceImageDepsSet.end()); } + + // Create a link graph and clone it for each device. + const std::shared_ptr &FirstDevice = + getSyclObjImpl(MDevices[0]); + std::map, LinkGraph> + DevImageLinkGraphs; + const auto &FirstGraph = + DevImageLinkGraphs + .emplace(FirstDevice, + LinkGraph{DevImages, Dependencies}) + .first->second; + for (size_t I = 1; I < MDevices.size(); ++I) + DevImageLinkGraphs.emplace(getSyclObjImpl(MDevices[I]), + FirstGraph.Clone()); + + // Poison the images based on whether the corresponding device supports it. + for (auto &GraphIt : DevImageLinkGraphs) { + device Dev = createSyclObjFromImpl(GraphIt.first); + GraphIt.second.Poison([&Dev](const device_image_plain &DevImg) { + return !getSyclObjImpl(DevImg)->compatible_with_device(Dev); + }); + } + + // Unify graphs after poisoning. + std::map>, + LinkGraph> + UnifiedGraphs = UnifyGraphs(DevImageLinkGraphs); + + // Link based on the resulting graphs. + for (auto &GraphIt : UnifiedGraphs) { + std::vector DeviceGroup; + DeviceGroup.reserve(GraphIt.first.size()); + for (const auto &DeviceImgImpl : GraphIt.first) + DeviceGroup.emplace_back(createSyclObjFromImpl(DeviceImgImpl)); + + std::vector LinkedResults = + detail::ProgramManager::getInstance().link( + GraphIt.second.GetNodeValues(), DeviceGroup, PropList); + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), + LinkedResults.end()); + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + LinkedResults.begin(), LinkedResults.end()); + // TODO: Kernels may be in multiple device images, so mapping should be + // added. + } + + // ... And link the offline images in separation. (Workaround.) + for (const DevImgPlainWithDeps *DeviceImageWithDeps : OfflineDeviceImages) { + // Skip images which are not compatible with devices provided + if (std::none_of(MDevices.begin(), MDevices.end(), + [DeviceImageWithDeps](const device &Dev) { + return getSyclObjImpl(DeviceImageWithDeps->getMain()) + ->compatible_with_device(Dev); + })) + continue; + + std::vector LinkedResults = + detail::ProgramManager::getInstance().link( + DeviceImageWithDeps->getAll(), MDevices, PropList); + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), + LinkedResults.end()); + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + LinkedResults.begin(), LinkedResults.end()); + } + removeDuplicateImages(); for (const kernel_bundle &Bundle : ObjectBundles) { @@ -374,11 +478,11 @@ class kernel_bundle_impl { kernel_bundle_impl( const context &Context, const std::vector &Devs, std::vector &&DevImgs, - std::vector> &&DevBinaries) + std::vector> &&DevBinaries, + bundle_state State) : MContext(Context), MDevices(Devs), MSharedDeviceBinaries(std::move(DevBinaries)), - MUniqueDeviceImages(std::move(DevImgs)), - MState(bundle_state::executable) { + MUniqueDeviceImages(std::move(DevImgs)), MState(State) { common_ctor_checks(); removeDuplicateImages(); @@ -407,7 +511,33 @@ class kernel_bundle_impl { NewDevImgs.emplace_back(std::move(DevImgImpl)); } return std::make_shared( - MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso)); + MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso), + bundle_state::executable); + } + + std::shared_ptr + compile_from_source(const std::vector Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { + assert(MState == bundle_state::ext_oneapi_source && + "bundle_state::ext_oneapi_source required"); + assert(allSourceBasedImages() && "All images must be source-based."); + + std::vector NewDevImgs; + std::vector> NewBinReso; + for (device_image_plain &DevImg : MUniqueDeviceImages) { + std::vector> NewDevImgImpls = + getSyclObjImpl(DevImg)->compileFromSource( + Devices, CompileOptions, LogPtr, RegisteredKernelNames, + NewBinReso); + NewDevImgs.reserve(NewDevImgImpls.size()); + for (std::shared_ptr &DevImgImpl : NewDevImgImpls) + NewDevImgs.emplace_back(std::move(DevImgImpl)); + } + return std::make_shared( + MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso), + bundle_state::object); } public: diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a3331019a97b2..ec278146b0ff4 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -669,7 +669,8 @@ static bool checkLinkingSupport(const device &Dev, std::set ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, - const device &Dev) { + const device &Dev, + bool ErrorOnUnresolvableImport) { // TODO collecting dependencies for virtual functions and imported symbols // should be combined since one can lead to new unresolved dependencies for // the other. @@ -677,14 +678,16 @@ ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, collectDependentDeviceImagesForVirtualFunctions(Img, Dev); std::set ImageDeps = - collectDeviceImageDepsForImportedSymbols(Img, Dev); + collectDeviceImageDepsForImportedSymbols(Img, Dev, + ErrorOnUnresolvableImport); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); return DeviceImagesToLink; } std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( - const RTDeviceBinaryImage &MainImg, const device &Dev) { + const RTDeviceBinaryImage &MainImg, const device &Dev, + bool ErrorOnUnresolvableImport) { std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; @@ -722,7 +725,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( } break; } - if (!Found) + if (ErrorOnUnresolvableImport && !Found) throw sycl::exception(make_error_code(errc::build), "No device image found for external symbol " + Symbol); @@ -1431,7 +1434,8 @@ ProgramManager::ProgramManager() UseSpvEnv + ": " + SpvFile); File.seekg(0, std::ios::end); size_t Size = File.tellg(); - std::unique_ptr Data(new char[Size]); + std::unique_ptr> Data(new char[Size], + std::free); File.seekg(0); File.read(Data.get(), Size); File.close(); @@ -1959,7 +1963,8 @@ void ProgramManager::addImage(sycl_device_binary RawImg, return; size_t ImgSize = static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); - std::unique_ptr Data(new char[ImgSize]); + std::unique_ptr> Data( + new char[ImgSize], std::free); std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); auto DynBfloat16DeviceLibImg = std::make_unique(std::move(Data), ImgSize); @@ -2768,7 +2773,7 @@ void ProgramManager::bringSYCLDeviceImageToState( break; case bundle_state::object: { std::vector LinkedDevImages = - link(DeviceImage, MainImgImpl->get_devices(), + link(DeviceImage.getAll(), MainImgImpl->get_devices(), /*PropList=*/{}); // Since only one device image is passed here one output device image is // expected @@ -2893,8 +2898,9 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, std::optional RTCInfo = InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( - InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, - bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, + InputImpl->get_bin_image_ref(), InputImpl->get_context(), + std::vector{Devs}, bundle_state::object, + InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), std::move(RTCInfo)); @@ -2922,10 +2928,14 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, return CompiledImages; } -static void mergeImageData(const std::vector &Imgs, - std::vector &KernelIDs, - std::vector &NewSpecConstBlob, - device_image_impl::SpecConstMapT &NewSpecConstMap) { +// Returns a merged device binary image, new set of kernel IDs and new +// specialization constant data. +static const RTDeviceBinaryImage * +mergeImageData(const std::vector &Imgs, + std::vector &KernelIDs, + std::vector &NewSpecConstBlob, + device_image_impl::SpecConstMapT &NewSpecConstMap, + std::unique_ptr &MergedImageStorage) { for (const device_image_plain &Img : Imgs) { const std::shared_ptr &DeviceImageImpl = getSyclObjImpl(Img); @@ -2964,10 +2974,28 @@ static void mergeImageData(const std::vector &Imgs, } // device_image_impl expects kernel ids to be sorted for fast search std::sort(KernelIDs.begin(), KernelIDs.end(), LessByHash{}); + + // If there is only a single image, use it as the result. + if (Imgs.size() == 1) + return getSyclObjImpl(Imgs[0])->get_bin_image_ref(); + + // Otherwise we create a dynamic image with the merged information. + std::vector BinImgs; + BinImgs.reserve(Imgs.size()); + for (const device_image_plain &Img : Imgs) { + auto ImgBinRef = getSyclObjImpl(Img)->get_bin_image_ref(); + // For some cases, like SYCL kernel compiler binaries, we don't have + // binaries. For these we assume no properties associated, so they can be + // safely ignored. + if (ImgBinRef) + BinImgs.push_back(ImgBinRef); + } + MergedImageStorage = std::make_unique(BinImgs); + return MergedImageStorage.get(); } std::vector -ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, +ProgramManager::link(const std::vector &Imgs, const std::vector &Devs, const property_list &PropList) { { @@ -2976,7 +3004,6 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, PropList, NoAllowedPropertiesCheck, NoAllowedPropertiesCheck); } - const std::vector &Imgs = ImgWithDeps.getAll(); std::vector URPrograms; URPrograms.reserve(Imgs.size()); for (const device_image_plain &Img : Imgs) @@ -2986,18 +3013,17 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, URDevices.reserve(Devs.size()); for (const device &Dev : Devs) URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); - + // FIXME: Linker options are picked from the first object, but is that safe? std::string LinkOptionsStr; applyLinkOptionsFromEnvironment(LinkOptionsStr); - const device_image_plain &MainImg = ImgWithDeps.getMain(); - const std::shared_ptr &InputImpl = getSyclObjImpl(MainImg); - if (LinkOptionsStr.empty()) { + const std::shared_ptr &FirstImgImpl = + getSyclObjImpl(Imgs[0]); + if (LinkOptionsStr.empty() && FirstImgImpl->get_bin_image_ref()) appendLinkOptionsFromImage(LinkOptionsStr, - *(InputImpl->get_bin_image_ref())); - } + *(FirstImgImpl->get_bin_image_ref())); // Should always come last! appendLinkEnvironmentVariablesThatAppend(LinkOptionsStr); - const context &Context = InputImpl->get_context(); + const context &Context = FirstImgImpl->get_context(); const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -3034,7 +3060,9 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, std::shared_ptr> KernelIDs{new std::vector}; std::vector NewSpecConstBlob; device_image_impl::SpecConstMapT NewSpecConstMap; - mergeImageData(Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap); + std::unique_ptr MergedImageStorage; + const RTDeviceBinaryImage *NewBinImg = mergeImageData( + Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap, MergedImageStorage); { std::lock_guard Lock(MNativeProgramsMutex); @@ -3043,10 +3071,11 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, // underlying program disposed of). Protecting from incorrect values by // removal of map entries with same handle (obviously invalid entries). std::ignore = NativePrograms.erase(LinkedProg); - for (const device_image_plain &Img : ImgWithDeps) { - NativePrograms.insert( - {LinkedProg, - {ContextImpl, getSyclObjImpl(Img)->get_bin_image_ref()}}); + for (const device_image_plain &Img : Imgs) { + const std::shared_ptr &ImgImpl = getSyclObjImpl(Img); + if (ImgImpl->get_bin_image_ref()) + NativePrograms.insert( + {LinkedProg, {ContextImpl, ImgImpl->get_bin_image_ref()}}); } } @@ -3056,20 +3085,21 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, // input ones and then merge them afterwards. std::vector *> RTCInfoPtrs; - RTCInfoPtrs.reserve(ImgWithDeps.size()); - for (const device_image_plain &DevImg : ImgWithDeps) { + RTCInfoPtrs.reserve(Imgs.size()); + for (const device_image_plain &DevImg : Imgs) { const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); CombinedOrigins |= DevImgImpl->getOriginMask(); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - auto BinImg = InputImpl->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared( - BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), - LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), - CombinedOrigins, std::move(MergedRTCInfo)); + NewBinImg, Context, std::vector{Devs}, + bundle_state::executable, std::move(KernelIDs), LinkedProg, + std::move(NewSpecConstMap), std::move(NewSpecConstBlob), + CombinedOrigins, std::move(MergedRTCInfo), + std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3104,6 +3134,8 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::vector SpecConstBlob; device_image_impl::SpecConstMapT SpecConstMap; + std::unique_ptr MergedImageStorage; + const RTDeviceBinaryImage *ResultBinImg = MainInputImpl->get_bin_image_ref(); if (DevImgWithDeps.hasDeps()) { KernelIDs = std::make_shared>(); // Sort the images to make the order of spec constant values used for @@ -3114,7 +3146,8 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, return getSyclObjImpl(A)->get_bin_image_ref()->getImageID() < getSyclObjImpl(B)->get_bin_image_ref()->getImageID(); }); - mergeImageData(SortedImgs, *KernelIDs, SpecConstBlob, SpecConstMap); + ResultBinImg = mergeImageData(SortedImgs, *KernelIDs, SpecConstBlob, + SpecConstMap, MergedImageStorage); } else { KernelIDs = MainInputImpl->get_kernel_ids_ptr(); SpecConstBlob = MainInputImpl->get_spec_const_blob_ref(); @@ -3137,10 +3170,10 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::move(BinImgs), Context, Devs, &DevImgWithDeps, SpecConstBlob); DeviceImageImplPtr ExecImpl = std::make_shared( - MainInputImpl->get_bin_image_ref(), Context, Devs, + ResultBinImg, Context, std::vector{Devs}, bundle_state::executable, std::move(KernelIDs), ResProgram, std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, - std::move(MergedRTCInfo)); + std::move(MergedRTCInfo), std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index f683a1c675935..1a3b60bfbc5dd 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -342,9 +342,9 @@ class ProgramManager { // Produces set of device images by convering input device images to object // the executable state - std::vector link(const DevImgPlainWithDeps &ImgWithDeps, - const std::vector &Devs, - const property_list &PropList); + std::vector + link(const std::vector &Imgs, + const std::vector &Devs, const property_list &PropList); // Produces new device image by converting input device image to the // executable state @@ -370,10 +370,12 @@ class ProgramManager { getRawDeviceImages(const std::vector &KernelIDs); std::set - collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); + collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev, + bool ErrorOnUnresolvableImport = true); std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, - const device &Dev); + const device &Dev, + bool ErrorOnUnresolvableImport); private: ProgramManager(ProgramManager const &) = delete; diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index ee3dbc78319e0..dc0ff4030472f 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -379,6 +379,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { namespace ext::oneapi::experimental { using source_kb = kernel_bundle; +using obj_kb = kernel_bundle; using exe_kb = kernel_bundle; using kernel_bundle_impl = sycl::detail::kernel_bundle_impl; @@ -458,6 +459,38 @@ source_kb make_kernel_bundle_from_source(const context &SyclContext, return sycl::detail::createSyclObjFromImpl(std::move(KBImpl)); } +///////////////////////// +// syclex::detail::compile_from_source(source_kb) => obj_kb +///////////////////////// + +obj_kb compile_from_source( + source_kb &SourceKB, const std::vector &Devices, + const std::vector &BuildOptions, + sycl::detail::string *LogView, + const std::vector &RegisteredKernelNames) { + std::vector Options; + for (const sycl::detail::string_view option : BuildOptions) + Options.push_back(option.data()); + + std::vector KernelNames; + for (const sycl::detail::string_view name : RegisteredKernelNames) + KernelNames.push_back(name.data()); + + std::string Log; + std::string *LogPtr = nullptr; + if (LogView) + LogPtr = &Log; + std::vector UniqueDevices = + sycl::detail::removeDuplicateDevices(Devices); + std::shared_ptr sourceImpl = getSyclObjImpl(SourceKB); + std::shared_ptr KBImpl = sourceImpl->compile_from_source( + UniqueDevices, Options, LogPtr, KernelNames); + auto result = sycl::detail::createSyclObjFromImpl(KBImpl); + if (LogView) + *LogView = Log; + return result; +} + ///////////////////////// // syclex::detail::build_from_source(source_kb) => exe_kb ///////////////////////// diff --git a/sycl/test-e2e/KernelCompiler/sycl_link.cpp b/sycl/test-e2e/KernelCompiler/sycl_link.cpp new file mode 100644 index 0000000000000..c574a7a5ca813 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link.cpp @@ -0,0 +1,102 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a simple linking case with source files compiled from SYCL source +// -- at runtime. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB, ExportObjKB}); + + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel"); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + int Failed = 0; + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "Unexpected value at index " << I << ": " << USMPtr[I] + << " != " << I << std::endl; + ++Failed; + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} \ No newline at end of file diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp new file mode 100644 index 0000000000000..b444bd67db3e2 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -0,0 +1,148 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where two kernels use the same imported symbols. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr, int Size) { + TestFunc(Ptr, Size); + for (size_t I = 0; I < Size; ++I) + ++Ptr[I]; +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource1); + source_kb ImportSourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource2); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB1 = syclex::compile(ImportSourceKB1, BuildOpts); + obj_kb ImportObjKB2 = syclex::compile(ImportSourceKB2, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB1, ImportObjKB2, ExportObjKB}); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + int Failed = 0; + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel1"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "TestKernel1: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << I << std::endl; + ++Failed; + } + } + } + + if (Q.get_device().has(sycl::aspect::fp64)) { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + const int Expected = I + 1; + if (USMPtr[I] != Expected) { + std::cout << "TestKernel2: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << Expected << std::endl; + ++Failed; + } + } + } else if (ExecKB.ext_oneapi_has_kernel("TestKernel2")) { + std::cout << "Device does not support fp64, but the kernel bundle still " + "has the kernel using it." + << std::endl; + ++Failed; + } + + sycl::free(USMPtr, Q); + + return Failed; +} \ No newline at end of file diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp new file mode 100644 index 0000000000000..2bae4a5062c5b --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -0,0 +1,144 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where two kernels use the same imported symbols, but one +// -- may not be supported on the device. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr, int Size) { + TestFunc(Ptr, Size); + for (size_t I = 0; I < Size; ++I) + Ptr[I] = static_cast(static_cast(Ptr[I]) / 2.0); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource1); + source_kb ImportSourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource2); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB1 = syclex::compile(ImportSourceKB1, BuildOpts); + obj_kb ImportObjKB2 = syclex::compile(ImportSourceKB2, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB1, ImportObjKB2, ExportObjKB}); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + int Failed = 0; + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel1"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "TestKernel1: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << I << std::endl; + ++Failed; + } + } + } + + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + const int Expected = static_cast(static_cast(I) / 2.0); + if (USMPtr[I] != Expected) { + std::cout << "TestKernel2: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << Expected << std::endl; + ++Failed; + } + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} \ No newline at end of file diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index bbbe31d702d51..0b4f6d23971b7 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3074,6 +3074,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implC2ERKNS3_16image_des _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD2Ev _ZN4sycl3_V13ext6oneapi12experimental6detail17build_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ +_ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ff747fcc07346..ae67f5afe1563 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3797,6 +3797,7 @@ ?checkNodePropertiesAndThrow@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@KAXAEBVproperty_list@67@@Z ?clearArgs@handler@_V1@sycl@@AEAAXXZ ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ +?compile_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$00@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vstring_view@detail@_V1@sycl@@V?$allocator@Vstring_view@detail@_V1@sycl@@@std@@@std@@PEAVstring@156@2@Z ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?complete_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAA?AVevent@56@AEBVproperty_list@56@@Z ?computeFallbackKernelBounds@handler@_V1@sycl@@AEAA?AV?$id@$01@23@_K0@Z diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 1f8ce4897a5f2..2e73ca996c3b9 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 14 +// CHECK-NUM-MATCHES: 17 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 419929abfe757ab5921b363aa823a1a45042cca2 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Sun, 6 Apr 2025 23:39:33 -0700 Subject: [PATCH 02/31] Add trailing newline to tests Signed-off-by: Larsen, Steffen --- sycl/test-e2e/KernelCompiler/sycl_link.cpp | 2 +- sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp | 2 +- .../KernelCompiler/sycl_link_common_dep_optional_feature.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_link.cpp b/sycl/test-e2e/KernelCompiler/sycl_link.cpp index c574a7a5ca813..9ff9878e387ee 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link.cpp @@ -99,4 +99,4 @@ int main() { sycl::free(USMPtr, Q); return Failed; -} \ No newline at end of file +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp index b444bd67db3e2..321c288704b0e 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -145,4 +145,4 @@ int main() { sycl::free(USMPtr, Q); return Failed; -} \ No newline at end of file +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp index 2bae4a5062c5b..77d5f2ee151eb 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -141,4 +141,4 @@ int main() { sycl::free(USMPtr, Q); return Failed; -} \ No newline at end of file +} From 147cb3d76a9a4876832cf4df15b43b0cd52755e7 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 02:05:14 -0700 Subject: [PATCH 03/31] Address binding capture warning Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 4920f60b79b84..d7a5b8590132b 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1030,6 +1030,8 @@ class device_image_impl { std::vector> Result; Result.reserve(NewImages.size()); for (auto &[NewImage, KernelIDs] : NewImages) { + const RTDeviceBinaryImage &NewImageRef = *NewImage; + std::set KernelNames; std::unordered_map MangledKernelNames; std::unordered_set DeviceGlobalIDSet; @@ -1108,8 +1110,8 @@ class device_image_impl { std::vector SupportingDevs = Devices; auto NewSupportingDevsEnd = std::remove_if( SupportingDevs.begin(), SupportingDevs.end(), - [NewImage](const sycl::device &SDev) { - return !doesDevSupportDeviceRequirements(SDev, *NewImage); + [&NewImageRef](const sycl::device &SDev) { + return !doesDevSupportDeviceRequirements(SDev, NewImageRef); }); SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); From 7fc520266b39b90af11ab1fe214d8bf87e04e796 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 02:57:53 -0700 Subject: [PATCH 04/31] Skip creating image when no devices support it Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index d7a5b8590132b..b5ab936d03799 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1113,6 +1113,10 @@ class device_image_impl { [&NewImageRef](const sycl::device &SDev) { return !doesDevSupportDeviceRequirements(SDev, NewImageRef); }); + + // If there are no devices that support the image, we skip it. + if (NewSupportingDevsEnd == SupportingDevs.begin()) + continue; SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); // Mark the image as input so the program manager will bring it into From 4ef1ce4cfd521018a493124090388f16abb8fb4b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 03:09:26 -0700 Subject: [PATCH 05/31] Move unsupported skip to earlier Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index b5ab936d03799..a5afda5fc152b 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1032,6 +1032,19 @@ class device_image_impl { for (auto &[NewImage, KernelIDs] : NewImages) { const RTDeviceBinaryImage &NewImageRef = *NewImage; + // Filter the devices that support the image requirements. + std::vector SupportingDevs = Devices; + auto NewSupportingDevsEnd = std::remove_if( + SupportingDevs.begin(), SupportingDevs.end(), + [&NewImageRef](const sycl::device &SDev) { + return !doesDevSupportDeviceRequirements(SDev, NewImageRef); + }); + + // If there are no devices that support the image, we skip it. + if (NewSupportingDevsEnd == SupportingDevs.begin()) + continue; + SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); + std::set KernelNames; std::unordered_map MangledKernelNames; std::unordered_set DeviceGlobalIDSet; @@ -1106,19 +1119,6 @@ class device_image_impl { getSyclObjImpl(MContext), std::string{Prefix}, std::move(DeviceGlobalNames), std::move(DeviceGlobalAllocations)); - // Filter the devices that support the image requirements. - std::vector SupportingDevs = Devices; - auto NewSupportingDevsEnd = std::remove_if( - SupportingDevs.begin(), SupportingDevs.end(), - [&NewImageRef](const sycl::device &SDev) { - return !doesDevSupportDeviceRequirements(SDev, NewImageRef); - }); - - // If there are no devices that support the image, we skip it. - if (NewSupportingDevsEnd == SupportingDevs.begin()) - continue; - SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); - // Mark the image as input so the program manager will bring it into // the right state. auto DevImgImpl = std::make_shared( From 14b44c49bc1747bca3f7e84ff2a3bef5f4ca0497 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 04:58:09 -0700 Subject: [PATCH 06/31] Fix test mixup Signed-off-by: Larsen, Steffen --- sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp | 7 +------ .../sycl_link_common_dep_optional_feature.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp index 321c288704b0e..c3322f023bc08 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -117,7 +117,7 @@ int main() { } } - if (Q.get_device().has(sycl::aspect::fp64)) { + { sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); memset(USMPtr, 0, Range * sizeof(int)); @@ -135,11 +135,6 @@ int main() { ++Failed; } } - } else if (ExecKB.ext_oneapi_has_kernel("TestKernel2")) { - std::cout << "Device does not support fp64, but the kernel bundle still " - "has the kernel using it." - << std::endl; - ++Failed; } sycl::free(USMPtr, Q); diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp index 77d5f2ee151eb..8c2ac147f16a2 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -118,7 +118,7 @@ int main() { } } - { + if (Q.get_device().has(sycl::aspect::fp64)) { sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); memset(USMPtr, 0, Range * sizeof(int)); @@ -136,6 +136,11 @@ int main() { ++Failed; } } + } else if (ExecKB.ext_oneapi_has_kernel("TestKernel2")) { + std::cout << "Device does not support fp64, but the kernel bundle still " + "has the kernel using it." + << std::endl; + ++Failed; } sycl::free(USMPtr, Q); From 3592e0d44cd424584d6574420367c58503039ebf Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 23:43:36 -0700 Subject: [PATCH 07/31] More tests and a correction Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 2 +- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 84ff09b333423..562b01b77c8e2 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -165,7 +165,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const { void RTDeviceBinaryImage::init(sycl_device_binary Bin) { ImageId = ImageCounter++; - // If there was no binary, we let the owner handle initialization and they see + // If there was no binary, we let the owner handle initialization as they see // fit. This is used when merging binaries, e.g. during linking. if (!Bin) return; diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 2e73ca996c3b9..f05ed485b90c4 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 17 +// CHECK-NUM-MATCHES: 19 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 18a03109b81164a868e1f41701e626dc79d3f01f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 7 Apr 2025 23:43:59 -0700 Subject: [PATCH 08/31] Actually include the tests Signed-off-by: Larsen, Steffen --- .../sycl_export_registration.cpp | 112 +++++++++++++++++ .../sycl_link_export_conflict.cpp | 116 ++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp create mode 100644 sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp new file mode 100644 index 0000000000000..9d95c94140866 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -0,0 +1,112 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a case where a kernel bundle is built that exports a symbol and +// -- other kernel bundles that uses it are compiled/linked without it. These +// -- cases should fail due to unresolved symbols, rather than picking up the +// -- symbol from the registered exported symbols. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = 0; +} + +)==="; + +int main() { + sycl::queue Q; + int Failed = 0; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + // Build the SYCL source that exports symbols. + syclex::build(ExportSourceKB, BuildOpts); + + // Build the SYCL source that imports symbols separately. This should fail to + // resolve exported symbols. + bool BuildFailed = false; + try { + syclex::build(ImportSourceKB, BuildOpts); + } catch (...) { + BuildFailed = true; + } + if (!BuildFailed) { + std::cout << "Building the SYCL source code with unresolved imported " + "symbols did NOT fail." + << std::endl; + ++Failed; + } + + // Compiling the import kernel bundle should work, despite unresolved symbols. + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + + // Link the SYCL source that imports symbols separately. This should fail to + // resolve exported symbols. + bool LinkingFailed = false; + try { + sycl::link({ImportObjKB}); + } catch (...) { + LinkingFailed = true; + } + if (!LinkingFailed) { + std::cout << "Linking the SYCL source code with unresolved imported " + "symbols did NOT fail." + << std::endl; + ++Failed; + } + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp new file mode 100644 index 0000000000000..f9d6381e08b38 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp @@ -0,0 +1,116 @@ +//==-- sycl_link_export_conflict.cpp --- kernel_compiler extension tests ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a case where a kernel bundle with an exported symbol is compiled +// -- before another kernel bundle using a different variant of the symbol. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = 0; +} + +)==="; + +auto constexpr SYCLExportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + + source_kb ConflictExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource1); + exe_kb ConflictingExecKB = syclex::build(ConflictExportSourceKB, BuildOpts); + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource2); + + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB, ExportObjKB}); + + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel"); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + int Failed = 0; + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "Unexpected value at index " << I << ": " << USMPtr[I] + << " != " << I << std::endl; + ++Failed; + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} From 5472cc9974215f96dda6bb330f4ce3d0f8176c78 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 8 Apr 2025 02:02:54 -0700 Subject: [PATCH 09/31] Fix typos Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index abf288adbebed..83a4fadef2f35 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -332,7 +332,7 @@ kernel_bundle compile( const std::vector& devs, PropertyListT props={}) template (2) -kernel_bundle build( +kernel_bundle compile( const kernel_bundle& sourceBundle, PropertyListT props = {}) @@ -376,7 +376,7 @@ _Throws:_ `sourceBundle`. * An `exception` with the `errc::invalid` error code if `props` contains an - `options` property that specifies an invalid option. + `build_options` property that specifies an invalid option. * An `exception` with the `errc::build` error code if the compilation operation fails. In this case, the exception `what` string provides a full build log, From 3d5ef9684c5e9ac58dbd8dfa8d9406899cbabc99 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 8 Apr 2025 02:03:38 -0700 Subject: [PATCH 10/31] Another typo Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 83a4fadef2f35..d0cd2779a9858 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -332,7 +332,7 @@ kernel_bundle compile( const std::vector& devs, PropertyListT props={}) template (2) -kernel_bundle compile( +kernel_bundle compile( const kernel_bundle& sourceBundle, PropertyListT props = {}) From a7d51cebd17e614cf4c975345f705d685cb87569 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 8 Apr 2025 02:10:26 -0700 Subject: [PATCH 11/31] Rename arg to match Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index d0cd2779a9858..24223be486591 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -328,7 +328,7 @@ namespace sycl::ext::oneapi::experimental { template (1) kernel_bundle compile( - const kernel_bundle& source, + const kernel_bundle& sourceBundle, const std::vector& devs, PropertyListT props={}) template (2) From 6d7a8944d21aa3d53499e2e95f53a26966574b58 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 8 Apr 2025 03:38:54 -0700 Subject: [PATCH 12/31] Avoid exporting RTC image symbols Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 3 ++- sycl/source/detail/program_manager/program_manager.cpp | 8 ++++++-- sycl/source/detail/program_manager/program_manager.hpp | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index a5afda5fc152b..6d91fa30121a0 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1018,7 +1018,8 @@ class device_image_impl { sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); RTDeviceBinaryImage *NewImage = nullptr; auto KernelIDs = std::make_shared>(); - PM.addImage(Binary, &NewImage, KernelIDs.get()); + PM.addImage(Binary, /*RegisterImgExports=*/false, &NewImage, + KernelIDs.get()); if (NewImage) NewImages.push_back( std::make_pair(std::move(NewImage), std::move(KernelIDs))); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ec278146b0ff4..994013d0782b3 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1898,6 +1898,7 @@ static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { } void ProgramManager::addImage(sycl_device_binary RawImg, + bool RegisterImgExports, RTDeviceBinaryImage **OutImage, std::vector *OutKernelIDs) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; @@ -1982,8 +1983,11 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } // Register all exported symbols - for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + if (RegisterImgExports) { + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + } } // Record mapping between virtual function sets and device images diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 1a3b60bfbc5dd..74e73fd384ec8 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -217,6 +217,7 @@ class ProgramManager { const ContextImplPtr &Context); void addImage(sycl_device_binary RawImg, + bool RegisterImgExports = true, RTDeviceBinaryImage **OutImage = nullptr, std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); From 6bb60501166fe9d37325ff9f1b0cf00597e5db9b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 8 Apr 2025 04:04:07 -0700 Subject: [PATCH 13/31] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/program_manager/program_manager.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 74e73fd384ec8..6541018c3d226 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -216,8 +216,7 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr &Context); - void addImage(sycl_device_binary RawImg, - bool RegisterImgExports = true, + void addImage(sycl_device_binary RawImg, bool RegisterImgExports = true, RTDeviceBinaryImage **OutImage = nullptr, std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); From d947479682f3a582db75e0ced6fe75744c16e9de Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 9 Apr 2025 00:46:49 -0700 Subject: [PATCH 14/31] Check RTC kernel conflict in link and filter unique images Signed-off-by: Larsen, Steffen --- sycl/source/detail/kernel_bundle_impl.hpp | 50 +++++++++-- .../sycl_link_kernel_conflict.cpp | 87 +++++++++++++++++++ .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 3 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 0b0687a9abf4b..31d0b7ceec873 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -238,15 +238,49 @@ class kernel_bundle_impl { } } - // Collect all images. + // Collect all unique images. std::vector DevImages; - for (const kernel_bundle &ObjectBundle : - ObjectBundles) - for (const device_image_plain &DevImg : - getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages) - if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) == - OfflineDeviceImageSet.end()) - DevImages.push_back(DevImg); + { + std::set> DevImagesSet; + for (const kernel_bundle &ObjectBundle : + ObjectBundles) + for (const device_image_plain &DevImg : + getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages) + if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) == + OfflineDeviceImageSet.end()) + DevImagesSet.insert(getSyclObjImpl(DevImg)); + DevImages.reserve(DevImagesSet.size()); + for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();) + DevImages.push_back(createSyclObjFromImpl( + std::move(DevImagesSet.extract(It++).value()))); + } + + // Check for conflicting kernels in RTC kernel bundles. + { + std::set> SeenKernelNames; + std::set> Conflicts; + for (const device_image_plain &DevImage : DevImages) { + const std::optional &RTCInfo = + getSyclObjImpl(DevImage)->getRTCInfo(); + if (!RTCInfo.has_value()) + continue; + std::vector Intersect; + std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), + RTCInfo->MKernelNames.begin(), + RTCInfo->MKernelNames.end(), + std::inserter(Conflicts, Conflicts.begin())); + SeenKernelNames.insert(RTCInfo->MKernelNames.begin(), + RTCInfo->MKernelNames.end()); + } + + if (!Conflicts.empty()) { + std::stringstream MsgS; + MsgS << "Conflicting kernel definitions: "; + for (const std::string_view &Conflict : Conflicts) + MsgS << " " << Conflict; + throw sycl::exception(make_error_code(errc::invalid), MsgS.str()); + } + } // Create a map between exported symbols and their indices in the device // images collection. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp new file mode 100644 index 0000000000000..0e889670ca333 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp @@ -0,0 +1,87 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for the linking of two kernels with conflicting definitions of +// -- kernels with the same name. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource1 = R"===( +#include + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr) { + *Ptr = 42; +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource2 = R"===( +#include + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr) { + *Ptr = 24; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb SourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLSource1); + source_kb SourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLSource2); + + obj_kb ObjKB1 = syclex::compile(SourceKB1); + obj_kb ObjKB2 = syclex::compile(SourceKB2); + + try { + sycl::link({ObjKB1, ObjKB2}); + } catch (sycl::exception &E) { + std::cout << "Exception caught: " << E.what() << std::endl; + return 0; + } + + std::cout << "No exception caught while linking two binaries with " + "conflicting kernels." + << std::endl; + + return 1; +} diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index f05ed485b90c4..c913f0d6bcaa6 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 19 +// CHECK-NUM-MATCHES: 20 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From b03367fcff25c5455a70e1c888972733b03692ef Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 22 Apr 2025 03:50:36 -0700 Subject: [PATCH 15/31] Remove leftover ctor Signed-off-by: Larsen, Steffen --- llvm/include/llvm/SYCLPostLink/ModuleSplitter.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h b/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h index a5bd6b6e5cf76..eb67abe971f54 100644 --- a/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLPostLink/ModuleSplitter.h @@ -303,11 +303,6 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, bool EmitOnlyKernelsAsEntryPoints, bool AllowDeviceImageDependencies); -std::unique_ptr -getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, - bool EmitOnlyKernelsAsEntryPoints, - bool OverwriteAllowDeviceImageDependencies); - #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *Msg = "", int Tab = 0); void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false, From 2861344b06204e4877bf4aff188d47a71ca6178e Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 23 Apr 2025 07:02:05 -0700 Subject: [PATCH 16/31] Clarify build_options Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 24223be486591..2a11b90a5782b 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -302,7 +302,7 @@ _Throws:_ * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT` or if `props` contains a `build_options` property that contains an option that is - not supported by `lang`. + not supported when building `lang`. * An `exception` with the `errc::build` error code if the compilation or linking operations fail. @@ -375,8 +375,10 @@ _Throws:_ `devs` does not support compilation of kernels in the source language of `sourceBundle`. -* An `exception` with the `errc::invalid` error code if `props` contains an - `build_options` property that specifies an invalid option. +* An `exception` with the `errc::invalid` error code if the source language + `lang` does not support one of the properties in `PropertyListT` or if + `props` contains a `build_options` property that contains an option that is + not supported when compiling `lang`. * An `exception` with the `errc::build` error code if the compilation operation fails. In this case, the exception `what` string provides a full build log, From 449437fcbb4ccb0ff0b3d90eae25e46b019f06a4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 23 Apr 2025 07:50:25 -0700 Subject: [PATCH 17/31] Split ext_oneapi_can_compile in two Signed-off-by: Larsen, Steffen --- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 24 +++++++++++++++++-- sycl/include/sycl/device.hpp | 17 +++++++++++-- sycl/source/detail/device_image_impl.hpp | 2 +- sycl/source/detail/device_impl.cpp | 14 ++++++++++- sycl/source/detail/device_impl.hpp | 1 + sycl/source/device.cpp | 5 ++++ .../kernel_shortcut_with_kb.cpp | 2 +- .../kernel_submit_with_event_and_kb.cpp | 2 +- .../kernel_submit_with_kb.cpp | 2 +- sycl/test-e2e/KernelCompiler/opencl.cpp | 4 ++-- .../KernelCompiler/opencl_cache_eviction.cpp | 2 +- .../KernelCompiler/opencl_queries.cpp | 2 +- .../opencl_queries_negative.cpp | 2 +- sycl/test-e2e/KernelCompiler/sycl.cpp | 3 +-- sycl/test-e2e/KernelCompiler/sycl_cache.cpp | 3 +-- .../test-e2e/KernelCompiler/sycl_cache_pm.cpp | 3 +-- .../KernelCompiler/sycl_context_error.cpp | 2 +- .../KernelCompiler/sycl_device_flags.cpp | 3 +-- .../KernelCompiler/sycl_device_globals.cpp | 4 ++-- .../sycl_export_registration.cpp | 2 +- .../KernelCompiler/sycl_include_paths.cpp | 3 +-- sycl/test-e2e/KernelCompiler/sycl_join.cpp | 2 +- .../KernelCompiler/sycl_lifetimes.cpp | 3 +-- .../KernelCompiler/sycl_time_trace.cpp | 3 +-- sycl/test/abi/sycl_symbols_linux.dump | 1 + sycl/test/abi/sycl_symbols_windows.dump | 1 + 26 files changed, 79 insertions(+), 33 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 2a11b90a5782b..9082706a99894 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -157,14 +157,34 @@ a! ---- class device { +bool ext_oneapi_can_build(ext::oneapi::experimental::source_language lang); + +}; +---- +!==== + +_Returns:_ The value `true` only if the device supports the +`ext::oneapi::experimental::build` function on kernel bundles written in the +source language `lang`. + +a| +[frame=all,grid=none] +!==== +a! +[source,c++] +---- +class device { + bool ext_oneapi_can_compile(ext::oneapi::experimental::source_language lang); }; ---- !==== -_Returns:_ The value `true` only if the device supports kernel bundles written -in the source language `lang`. +_Returns:_ The value `true` only if the device supports the +`ext::oneapi::experimental::compile` function on kernel bundles written in the +source language `lang`. + |==== === New free functions to create and build kernel bundles diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 8a9a331863703..40dae8561b92c 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -299,14 +299,27 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// kernel_compiler extension + /// Indicates if the device can build a kernel for the given language. + /// + /// \param Language is one of the values from the + /// kernel_bundle::source_language enumeration described in the + /// sycl_ext_oneapi_kernel_compiler specification + /// + /// \return The value true only if the device supports the + /// ext::oneapi::experimental::build function on kernel bundles written in + /// the source language \p Language. + bool + ext_oneapi_can_build(ext::oneapi::experimental::source_language Language); + /// Indicates if the device can compile a kernel for the given language. /// /// \param Language is one of the values from the /// kernel_bundle::source_language enumeration described in the /// sycl_ext_oneapi_kernel_compiler specification /// - /// \return true only if the device supports kernel bundles written in the - /// source language `lang`. + /// \return The value true only if the device supports the + /// ext::oneapi::experimental::compile function on kernel bundles written in + /// the source language \p Language. bool ext_oneapi_can_compile(ext::oneapi::experimental::source_language Language); diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 409c9477e9b6d..a2524f5ef9704 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -711,7 +711,7 @@ class device_image_impl { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + if (!DevImpl->extOneapiCanBuild(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 617d54ec8b39f..19b91736e0bcf 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -918,7 +918,7 @@ bool device_impl::isGetDeviceAndHostTimerSupported() { return Result != UR_RESULT_ERROR_INVALID_OPERATION; } -bool device_impl::extOneapiCanCompile( +bool device_impl::extOneapiCanBuild( ext::oneapi::experimental::source_language Language) { try { return sycl::ext::oneapi::experimental::detail:: @@ -928,6 +928,18 @@ bool device_impl::extOneapiCanCompile( } } +bool device_impl::extOneapiCanCompile( + ext::oneapi::experimental::source_language Language) { + try { + // Currently only SYCL language is supported for compiling. + return Language == ext::oneapi::experimental::source_language::sycl && + sycl::ext::oneapi::experimental::detail:: + is_source_kernel_bundle_supported(getBackend(), Language); + } catch (sycl::exception &) { + return false; + } +} + // Returns the strongest guarantee that can be provided by the host device for // threads created at threadScope from a coordination scope given by // coordinationScope diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 57c3f02fbd2b7..b515eb43fbbe2 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -237,6 +237,7 @@ class device_impl { return false; } + bool extOneapiCanBuild(ext::oneapi::experimental::source_language Language); bool extOneapiCanCompile(ext::oneapi::experimental::source_language Language); // Returns all guarantees that are either equal to guarantee or weaker than diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 1e27e4f898bc1..4cf27f9f7359d 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -263,6 +263,11 @@ bool device::ext_oneapi_architecture_is( } // kernel_compiler extension methods +bool device::ext_oneapi_can_build( + ext::oneapi::experimental::source_language Language) { + return impl->extOneapiCanBuild(Language); +} + bool device::ext_oneapi_can_compile( ext::oneapi::experimental::source_language Language) { return impl->extOneapiCanCompile(Language); diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp index 91713e2a3293a..2e92784488b86 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp @@ -21,7 +21,7 @@ constexpr size_t N = 1024; int main() { sycl::queue Q; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp index d0249ef64bcf4..dc2559369b9eb 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue Q; int Memory[N] = {0}; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp index c491a22c69779..eed68407a4790 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue Q; int Memory[N] = {0}; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/opencl.cpp b/sycl/test-e2e/KernelCompiler/opencl.cpp index a532ff468bc5f..e8d8cc44368f9 100644 --- a/sycl/test-e2e/KernelCompiler/opencl.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl.cpp @@ -103,7 +103,7 @@ void test_build_and_run() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { std::cout << "Apparently this device does not support OpenCL C source " "kernel bundle extension: " @@ -164,7 +164,7 @@ void test_error() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { return; } diff --git a/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp b/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp index 90ab79b732b32..9db50e18eaf1e 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp @@ -50,7 +50,7 @@ void test_build_and_run() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { std::cout << "Apparently this device does not support OpenCL C source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/opencl_queries.cpp b/sycl/test-e2e/KernelCompiler/opencl_queries.cpp index 4ccea9db88a7f..3e28e673afc69 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_queries.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_queries.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - assert(d.ext_oneapi_can_compile(syclex::source_language::opencl) && + assert(d.ext_oneapi_can_build(syclex::source_language::opencl) && "can_compile(opencl) unexpectedly false"); assert(d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_1_0) && diff --git a/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp b/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp index caad64e8c363a..033213c82f68f 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp @@ -19,7 +19,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - assert(!d.ext_oneapi_can_compile(syclex::source_language::opencl) && + assert(!d.ext_oneapi_can_build(syclex::source_language::opencl) && "can_compile(opencl) unexpectedly true"); assert(!d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_1_0) && diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index 2fb8c79a4ac4d..1b45686e41e5d 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -527,8 +527,7 @@ int main() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { return -1; } diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp index 2bec52ac46c5a..1ec2f744b632d 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp @@ -58,8 +58,7 @@ int test_persistent_cache() { sycl::context ctx{d}; sycl::queue q{ctx, d}; - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp index d275a81b51ccc..c449c25647414 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp @@ -88,8 +88,7 @@ void test_build_and_run() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support SYCL source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp index e9dafa672fac7..c7a0be335c0fa 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp @@ -41,7 +41,7 @@ int main() { return false; } for (auto dev : devices) { - if (!dev.ext_oneapi_can_compile(syclexp::source_language::sycl)) { + if (!dev.ext_oneapi_can_build(syclexp::source_language::sycl)) { return false; } } diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp index f457f1b7912ca..93aa4eb9712c6 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp @@ -119,8 +119,7 @@ int main(int argc, char *argv[]) { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "compiling from SYCL source not supported" << std::endl; return 0; // if kernel compilation is not supported, do nothing. diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp index 0ebb59f42959d..ceda7252369e5 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp @@ -53,7 +53,7 @@ int test_device_global() { sycl::context ctx = q.get_context(); sycl::device d = q.get_device(); - bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = d.ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " @@ -141,7 +141,7 @@ int test_error() { sycl::context ctx = q.get_context(); sycl::device d = q.get_device(); - bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = d.ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { return 0; } diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp index 9d95c94140866..ee06de11f908e 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -58,7 +58,7 @@ int main() { sycl::queue Q; int Failed = 0; - if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + if (!Q.get_device().ext_oneapi_can_build(syclex::source_language::sycl)) { std::cout << "Device does not support one of the source languages: " << Q.get_device().get_info() << std::endl; diff --git a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp index ada5c06cd1160..bf0d46b5800be 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp @@ -73,8 +73,7 @@ int test_include_paths(const std::string &baseDir) { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 2b4dc8f8e1bec..99ad22396f7af 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -66,7 +66,7 @@ int main() { sycl::queue Q; sycl::context Ctx = Q.get_context(); - if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + if (!Q.get_device().ext_oneapi_can_build(syclex::source_language::sycl)) { std::cout << "Apparently this device does not support `sycl` source " "kernel bundle extension: " << Q.get_device().get_info() diff --git a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp index bf6dd99674a38..4ef1cb2d88e9c 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp @@ -38,8 +38,7 @@ int test_lifetimes() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp index 03d074fafa48f..77a2d89bed123 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp @@ -23,8 +23,7 @@ int test_tracing() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index ab51c7d79c530..70efbe952ef20 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3350,6 +3350,7 @@ _ZN4sycl3_V16detail9modf_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail9modf_implEdPd _ZN4sycl3_V16detail9modf_implEfPf _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE +_ZN4sycl3_V16device20ext_oneapi_can_buildENS0_3ext6oneapi12experimental15source_languageE _ZN4sycl3_V16device22ext_oneapi_can_compileENS0_3ext6oneapi12experimental15source_languageE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index f7799264372dc..490b60638d189 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3849,6 +3849,7 @@ ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z +?ext_oneapi_can_build@device@_V1@sycl@@QEAA_NW4source_language@experimental@oneapi@ext@23@@Z ?ext_oneapi_can_compile@device@_V1@sycl@@QEAA_NW4source_language@experimental@oneapi@ext@23@@Z ?ext_oneapi_cl_profile@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?ext_oneapi_cl_profile_impl@device@_V1@sycl@@AEBA?AVstring@detail@23@XZ From c33808dbf1f7aa3794f9b912a1764de8b63b0cd7 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 23 Apr 2025 07:58:32 -0700 Subject: [PATCH 18/31] Change exception condition for compile Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 3 +-- .../sycl_ext_oneapi_kernel_compiler_opencl.asciidoc | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 9082706a99894..6910213eaf787 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -385,8 +385,7 @@ _Returns:_ The newly created kernel bundle, which has `object` state. _Throws:_ * An `exception` with the `errc::invalid` error code if `source` was not created - with `source_language::sycl` or was the result of `sycl::join` taking one or - more `kernel_bundle` objects not created with `source_language::sycl`. + with `source_language::sycl`. * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc index 7d79e75634c4d..f5d970d4f3e2d 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc @@ -461,7 +461,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - if (d.ext_oneapi_can_compile(syclex::source_language::opencl)) + if (d.ext_oneapi_can_build(syclex::source_language::opencl)) std::cout << "Device supports online compilation of OpenCL C kernels\n"; if (d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_3_0)) From 70c692a22b94e7a530c57bab2e9892baa4ee76f4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 06:02:43 -0700 Subject: [PATCH 19/31] Remove the compile source language restriction Signed-off-by: Larsen, Steffen --- .../experimental/sycl_ext_oneapi_kernel_compiler.asciidoc | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 6910213eaf787..d682d67b0f8ba 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -384,9 +384,6 @@ _Returns:_ The newly created kernel bundle, which has `object` state. _Throws:_ -* An `exception` with the `errc::invalid` error code if `source` was not created - with `source_language::sycl`. - * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. From 2f4257890595a16696b8258568de428c24907528 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 06:20:08 -0700 Subject: [PATCH 20/31] Move language support requirement to build/compile Signed-off-by: Larsen, Steffen --- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index d682d67b0f8ba..ef28bd56a7d15 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -246,8 +246,6 @@ state. _Throws:_ -* An `exception` with the `errc::invalid` error code if the source language - `lang` is not supported by any device contained by the context `ctxt`. * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT`. * Overload (1) throws an `exception` with the `errc::invalid` error code if the @@ -312,6 +310,10 @@ _Returns:_ The newly created kernel bundle, which has `executable` state. _Throws:_ +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` return `false` for `ext_oneapi_can_build` with the source language + `lang`. + * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. @@ -384,6 +386,10 @@ _Returns:_ The newly created kernel bundle, which has `object` state. _Throws:_ +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` return `false` for `ext_oneapi_can_compile` with the source language + `lang`. + * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. From 29894623adc399e624c9d305ebd6b86a11aaacad Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 07:19:42 -0700 Subject: [PATCH 21/31] Address comments Signed-off-by: Larsen, Steffen --- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index ef28bd56a7d15..0f5da5dbc549f 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -259,9 +259,11 @@ function. This function succeeds even if some devices in `ctxt` do not support the source language `lang`. -However, the `build` function fails unless _all_ of its devices support `lang`. -Therefore, applications should take care to omit devices that do not support -`lang` when calling `build`. +However, the `build` and `compile` functions will fail if any of its devices +return `false` for `ext_oneapi_can_build(lang)` and +`ext_oneapi_can_compile(lang)` respectively. Therefore, applications should take +care to omit devices that do not support `lang` for the functions they intend on +calling. _{endnote}_] a| @@ -311,16 +313,12 @@ _Returns:_ The newly created kernel bundle, which has `executable` state. _Throws:_ * An `exception` with the `errc::invalid` error code if any of the devices in - `devs` return `false` for `ext_oneapi_can_build` with the source language - `lang`. + `devs` return `false` for `ext_oneapi_can_build` with the source language of + `sourceBundle`. * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. -* An `exception` with the `errc::invalid` error code if any of the devices in - `devs` does not support compilation of kernels in the source language of - `sourceBundle`. - * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT` or if `props` contains a `build_options` property that contains an option that is @@ -387,16 +385,12 @@ _Returns:_ The newly created kernel bundle, which has `object` state. _Throws:_ * An `exception` with the `errc::invalid` error code if any of the devices in - `devs` return `false` for `ext_oneapi_can_compile` with the source language - `lang`. + `devs` return `false` for `ext_oneapi_can_compile` with the source language of + `sourceBundle`. * An `exception` with the `errc::invalid` error code if any of the devices in `devs` is not contained by the context associated with `sourceBundle`. -* An `exception` with the `errc::invalid` error code if any of the devices in - `devs` does not support compilation of kernels in the source language of - `sourceBundle`. - * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT` or if `props` contains a `build_options` property that contains an option that is From cdec73ef4f9ad8569123454ef0562524c0508b3a Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 21:21:59 -0700 Subject: [PATCH 22/31] Address comments Signed-off-by: Larsen, Steffen --- sycl/include/sycl/kernel_bundle.hpp | 2 + sycl/source/detail/device_binary_image.cpp | 64 +++++++++---------- sycl/source/detail/kernel_bundle_impl.hpp | 3 +- .../sycl_export_registration.cpp | 2 +- .../KernelCompiler/sycl_link_common_dep.cpp | 2 +- .../sycl_link_common_dep_optional_feature.cpp | 2 +- .../sycl_link_kernel_conflict.cpp | 2 +- 7 files changed, 40 insertions(+), 37 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 0a8c3a03c9282..9a841f272fcfe 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -1167,6 +1167,7 @@ build_from_source(kernel_bundle &SourceKB, std::string *LogPtr, const std::vector &RegisteredKernelNames) { std::vector Options; + Options.reserve(BuildOptions.size()); for (const std::string &opt : BuildOptions) Options.push_back(sycl::detail::string_view{opt}); @@ -1198,6 +1199,7 @@ compile_from_source(kernel_bundle &SourceKB, std::string *LogPtr, const std::vector &RegisteredKernelNames) { std::vector Options; + Options.reserve(CompileOptions.size()); for (const std::string &opt : CompileOptions) Options.push_back(sycl::detail::string_view{opt}); diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 562b01b77c8e2..85d8acbd8feaf 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include namespace sycl { inline namespace _V1 { @@ -250,11 +250,12 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } -// Exclusive property merge logic. It assumes there are no cases where -// properties have different values and throws otherwise. +// "Naive" property merge logic. It merges the properties into a single property +// vector without checking for duplicates. As such, duplicates may occur in the +// final result. template static std::vector -NaiveMergeBinaryProperties(const std::vector &Imgs, +naiveMergeBinaryProperties(const std::vector &Imgs, const RangeGetterT &RangeGetter) { size_t PropertiesCount = 0; for (const RTDeviceBinaryImage *Img : Imgs) @@ -274,7 +275,7 @@ NaiveMergeBinaryProperties(const std::vector &Imgs, // are no cases where properties have different values and throws otherwise. template static std::map -ExclusiveMergeBinaryProperties( +exclusiveMergeBinaryProperties( const std::vector &Imgs, const RangeGetterT &RangeGetter, bool IgnoreDuplicates = false) { std::map MergeMap; @@ -303,9 +304,9 @@ ExclusiveMergeBinaryProperties( // information for these are kept in this struct. struct MergedDeviceRequirements { std::map MergeMap; - std::set Aspects; - std::set JointMatrix; - std::set JointMatrixMad; + std::unordered_set Aspects; + std::unordered_set JointMatrix; + std::unordered_set JointMatrixMad; size_t GetPropertiesCount() const { return MergeMap.size() + !Aspects.empty() + !JointMatrix.empty() + @@ -316,7 +317,8 @@ struct MergedDeviceRequirements { return Aspects.size() * sizeof(uint32_t); } - static size_t GetStringSetContentSize(const std::set &Set) { + static size_t + GetStringSetContentSize(const std::unordered_set &Set) { size_t Result = 0; Result += Set.size() - 1; // Semi-colon delimiters. for (const std::string_view &Str : Set) // Strings. @@ -365,7 +367,7 @@ struct MergedDeviceRequirements { } static void WriteStringSetProperty( - const std::set &Set, const char *SetName, + const std::unordered_set &Set, const char *SetName, sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent) { if (Set.empty()) return; @@ -398,10 +400,10 @@ MergeDeviceRequirements(const std::vector &Imgs) { const RTDeviceBinaryImage::PropertyRange &Range = Img->getDeviceRequirements(); for (const sycl_device_binary_property Prop : Range) { - std::string_view NameView = std::string_view{Prop->Name}; + std::string_view NameView{Prop->Name}; // Aspects we collect in a set early and add them afterwards. - if (NameView == std::string_view{"aspects"}) { + if (NameView == "aspects") { // Skip size bytes. auto AspectIt = reinterpret_cast( reinterpret_cast(Prop->ValAddr) + 8); @@ -412,12 +414,10 @@ MergeDeviceRequirements(const std::vector &Imgs) { // joint_matrix and joint_matrix_mad have the same format, so we parse // them the same way. - if (NameView == std::string_view{"joint_matrix"} || - NameView == std::string_view{"joint_matrix_mad"}) { - std::set &Set = - NameView == std::string_view{"joint_matrix"} - ? MergedReqs.JointMatrix - : MergedReqs.JointMatrixMad; + if (NameView == "joint_matrix" || NameView == "joint_matrix_mad") { + std::unordered_set &Set = + NameView == "joint_matrix" ? MergedReqs.JointMatrix + : MergedReqs.JointMatrixMad; // Skip size bytes. std::string_view Contents{reinterpret_cast(Prop->ValAddr) + 8, @@ -481,39 +481,39 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( // Naive merges. auto MergedSpecConstants = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getSpecConstants(); }); auto MergedSpecConstantsDefaultValues = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getSpecConstantsDefaultValues(); }); auto MergedKernelParamOptInfo = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getKernelParamOptInfo(); }); - auto MergedAssertUsed = NaiveMergeBinaryProperties( + auto MergedAssertUsed = naiveMergeBinaryProperties( Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getAssertUsed(); }); auto MergedDeviceGlobals = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getDeviceGlobals(); }); - auto MergedHostPipes = NaiveMergeBinaryProperties( + auto MergedHostPipes = naiveMergeBinaryProperties( Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getHostPipes(); }); auto MergedVirtualFunctions = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getVirtualFunctions(); }); auto MergedImplicitLocalArg = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getImplicitLocalArg(); }); auto MergedExportedSymbols = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getExportedSymbols(); }); auto MergedRegisteredKernels = - NaiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getRegisteredKernels(); }); @@ -526,19 +526,19 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( // Exclusive merges. auto MergedDeviceLibReqMask = - ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getDeviceLibReqMask(); }); auto MergedProgramMetadata = - ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getProgramMetadata(); }); - auto MergedImportedSymbols = ExclusiveMergeBinaryProperties( + auto MergedImportedSymbols = exclusiveMergeBinaryProperties( Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getImportedSymbols(); }, /*IgnoreDuplicates=*/true); auto MergedMisc = - ExclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getMiscProperties(); }); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 8b294b20e366c..d9fdbff15e339 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -225,7 +225,8 @@ class kernel_bundle_impl { // regular offline-compiled SYCL device images in separation. // TODO: Remove when spec const overwriting issue has been fixed in L0. std::vector OfflineDeviceImages; - std::set> OfflineDeviceImageSet; + std::unordered_set> + OfflineDeviceImageSet; for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const DevImgPlainWithDeps &DeviceImageWithDeps : diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp index ee06de11f908e..789bab6223546 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -1,4 +1,4 @@ -//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +//==--- sycl_export_registration.cpp --- kernel_compiler extension tests ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp index c3322f023bc08..49d67205b8cd1 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -1,4 +1,4 @@ -//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +//==-----sycl_link_common_dep.cpp --- kernel_compiler extension tests ------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp index 8c2ac147f16a2..76fe85ce72fa9 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -1,4 +1,4 @@ -//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +//==- sycl_link_common_dep_optional_feature.cpp --- kernel_compiler tests --==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp index 0e889670ca333..7fe9b0fd9db79 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp @@ -1,4 +1,4 @@ -//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +//==-- sycl_link_kernel_conflict.cpp --- kernel_compiler extension tests ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From aa9f50282d7fb8b9dc93a6cd11376660a3b6761b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 21:24:44 -0700 Subject: [PATCH 23/31] Rename more functions Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 44 +++++++++++----------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 85d8acbd8feaf..2d8295b400038 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -308,17 +308,17 @@ struct MergedDeviceRequirements { std::unordered_set JointMatrix; std::unordered_set JointMatrixMad; - size_t GetPropertiesCount() const { + size_t getPropertiesCount() const { return MergeMap.size() + !Aspects.empty() + !JointMatrix.empty() + !JointMatrixMad.empty(); } - size_t GetAspectsContentSize() const { + size_t getAspectsContentSize() const { return Aspects.size() * sizeof(uint32_t); } static size_t - GetStringSetContentSize(const std::unordered_set &Set) { + getStringSetContentSize(const std::unordered_set &Set) { size_t Result = 0; Result += Set.size() - 1; // Semi-colon delimiters. for (const std::string_view &Str : Set) // Strings. @@ -326,33 +326,33 @@ struct MergedDeviceRequirements { return Result; } - size_t GetPropertiesContentByteSize() const { + size_t getPropertiesContentByteSize() const { size_t Result = 0; for (const auto &PropIt : MergeMap) Result += strlen(PropIt.second->Name) + 1 + PropIt.second->ValSize; if (!Aspects.empty()) - Result += strlen("aspects") + 1 + GetAspectsContentSize(); + Result += strlen("aspects") + 1 + getAspectsContentSize(); if (!JointMatrix.empty()) Result += - strlen("joint_matrix") + 1 + GetStringSetContentSize(JointMatrix); + strlen("joint_matrix") + 1 + getStringSetContentSize(JointMatrix); if (!JointMatrixMad.empty()) Result += strlen("joint_matrix_mad") + 1 + - GetStringSetContentSize(JointMatrixMad); + getStringSetContentSize(JointMatrixMad); return Result; } - void WriteAspectProperty(sycl_device_binary_property &NextFreeProperty, + void writeAspectProperty(sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent) const { if (Aspects.empty()) return; // Get the next free property entry and move the needle. sycl_device_binary_property NewProperty = NextFreeProperty++; NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; - NewProperty->ValSize = GetAspectsContentSize(); + NewProperty->ValSize = getAspectsContentSize(); // Copy the name. const size_t NameLen = std::strlen("aspects"); std::memcpy(NextFreeContent, "aspects", NameLen + 1); @@ -366,7 +366,7 @@ struct MergedDeviceRequirements { NextFreeContent += NewProperty->ValSize; } - static void WriteStringSetProperty( + static void writeStringSetProperty( const std::unordered_set &Set, const char *SetName, sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent) { if (Set.empty()) @@ -374,7 +374,7 @@ struct MergedDeviceRequirements { // Get the next free property entry and move the needle. sycl_device_binary_property NewProperty = NextFreeProperty++; NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; - NewProperty->ValSize = GetStringSetContentSize(Set); + NewProperty->ValSize = getStringSetContentSize(Set); // Copy the name. const size_t NameLen = std::strlen(SetName); std::memcpy(NextFreeContent, SetName, NameLen + 1); @@ -394,7 +394,7 @@ struct MergedDeviceRequirements { // Merging device requirements is a little more involved, as it may impose // new requirements. static MergedDeviceRequirements -MergeDeviceRequirements(const std::vector &Imgs) { +mergeDeviceRequirements(const std::vector &Imgs) { MergedDeviceRequirements MergedReqs; for (const RTDeviceBinaryImage *Img : Imgs) { const RTDeviceBinaryImage::PropertyRange &Range = @@ -451,7 +451,7 @@ MergeDeviceRequirements(const std::vector &Imgs) { } // Copies a property into new memory. -static void CopyProperty(sycl_device_binary_property &NextFreeProperty, +static void copyProperty(sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent, const sycl_device_binary_property OldProperty) { // Get the next free property entry and move the needle. @@ -554,7 +554,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( // For device requirements we need to do special handling to merge the // property values as well. - MergedDeviceRequirements MergedDevReqs = MergeDeviceRequirements(Imgs); + MergedDeviceRequirements MergedDevReqs = mergeDeviceRequirements(Imgs); // Now that we have merged all properties, we need to calculate how much // memory we need to store the new property sets. @@ -571,7 +571,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( PropertyCount += Vec->size(); for (const auto &Map : MergedMaps) PropertyCount += Map->size(); - PropertyCount += MergedDevReqs.GetPropertiesCount(); + PropertyCount += MergedDevReqs.getPropertiesCount(); // Count the bytes needed for the values and names of the properties. auto GetPropertyContentSize = [](const sycl_device_binary_property Prop) { @@ -586,7 +586,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( for (const auto &PropIt : *Map) PropertyContentByteSize += strlen(PropIt.second->Name) + 1 + GetPropertyContentSize(PropIt.second); - PropertyContentByteSize += MergedDevReqs.GetPropertiesContentByteSize(); + PropertyContentByteSize += MergedDevReqs.getPropertiesContentByteSize(); const size_t PropertySectionSize = PropertyCount * PaddedPropertyByteSize; @@ -617,7 +617,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( return; TargetRange.Begin = NextFreeProperty; for (const sycl_device_binary_property Prop : Properties) - CopyProperty(NextFreeProperty, NextFreeContent, Prop); + copyProperty(NextFreeProperty, NextFreeContent, Prop); TargetRange.End = NextFreeProperty; }; auto CopyPropertiesMap = @@ -627,7 +627,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( return; TargetRange.Begin = NextFreeProperty; for (const auto &PropIt : Properties) - CopyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + copyProperty(NextFreeProperty, NextFreeContent, PropIt.second); TargetRange.End = NextFreeProperty; }; @@ -652,12 +652,12 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( { DeviceRequirements.Begin = NextFreeProperty; for (const auto &PropIt : MergedDevReqs.MergeMap) - CopyProperty(NextFreeProperty, NextFreeContent, PropIt.second); - MergedDevReqs.WriteAspectProperty(NextFreeProperty, NextFreeContent); - MergedDeviceRequirements::WriteStringSetProperty( + copyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + MergedDevReqs.writeAspectProperty(NextFreeProperty, NextFreeContent); + MergedDeviceRequirements::writeStringSetProperty( MergedDevReqs.JointMatrix, "joint_matrix", NextFreeProperty, NextFreeContent); - MergedDeviceRequirements::WriteStringSetProperty( + MergedDeviceRequirements::writeStringSetProperty( MergedDevReqs.JointMatrixMad, "joint_matrix_mad", NextFreeProperty, NextFreeContent); DeviceRequirements.End = NextFreeProperty; From f900a6afce3925224a9cb122e2f46a9c662ad7cd Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 22:23:15 -0700 Subject: [PATCH 24/31] Change to using string_view for options Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 53 ++++++++++--------- sycl/source/detail/kernel_bundle_impl.hpp | 20 +++---- .../kernel_compiler_opencl.cpp | 23 ++++---- .../kernel_compiler_opencl.hpp | 9 ++-- .../kernel_compiler/kernel_compiler_sycl.cpp | 30 ++++++----- .../kernel_compiler/kernel_compiler_sycl.hpp | 7 ++- sycl/source/kernel_bundle.cpp | 20 +------ 7 files changed, 80 insertions(+), 82 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index a2524f5ef9704..839b0e90e6523 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -694,8 +694,9 @@ class device_image_impl { std::vector> buildFromSource( const std::vector &Devices, - const std::vector &BuildOptions, std::string *LogPtr, - const std::vector &RegisteredKernelNames, + const std::vector &BuildOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, std::vector> &OutDeviceBins) const { assert(!std::holds_alternative(MBinImage)); @@ -779,8 +780,9 @@ class device_image_impl { std::vector> compileFromSource( const std::vector &Devices, - const std::vector &CompileOptions, std::string *LogPtr, - const std::vector &RegisteredKernelNames, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, std::vector> &OutDeviceBins) const { assert(!std::holds_alternative(MBinImage)); @@ -822,7 +824,7 @@ class device_image_impl { get_bin_image_ref() != nullptr; } - static std::string trimXsFlags(std::string &str) { + static std::string_view trimXsFlags(std::string_view &str) { // Trim first and last quote if they exist, but no others. char EncounteredQuote = '\0'; auto Start = std::find_if(str.begin(), str.end(), [&](char c) { @@ -840,31 +842,31 @@ class device_image_impl { return !std::isspace(c); }).base(); if (Start != std::end(str) && End != std::begin(str) && Start < End) { - return std::string(Start, End); + return std::string_view(Start, std::distance(Start, End)); } return ""; } static std::string - extractXsFlags(const std::vector &BuildOptions) { + extractXsFlags(const std::vector &BuildOptions) { std::stringstream SS; - for (std::string Option : BuildOptions) { - auto Where = Option.find("-Xs"); - if (Where != std::string::npos) { + for (sycl::detail::string_view Option : BuildOptions) { + std::string_view OptionSV{Option.data()}; + auto Where = OptionSV.find("-Xs"); + if (Where != std::string_view::npos) { Where += 3; - std::string Flags = Option.substr(Where); + std::string_view Flags = OptionSV.substr(Where); SS << trimXsFlags(Flags) << " "; } } return SS.str(); } - bool - extKernelCompilerFetchFromCache(const std::vector Devices, - const std::vector &BuildOptions, - const std::string &SourceStr, - ur_program_handle_t &UrProgram) const { + bool extKernelCompilerFetchFromCache( + const std::vector Devices, + const std::vector &BuildOptions, + const std::string &SourceStr, ur_program_handle_t &UrProgram) const { const std::shared_ptr &ContextImpl = getSyclObjImpl(MContext); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -976,12 +978,12 @@ class device_image_impl { } } - std::vector> - createSYCLImages(const std::vector &Devices, bundle_state State, - const std::vector &Options, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - std::vector> - &OutDeviceBins) const { + std::vector> createSYCLImages( + const std::vector &Devices, bundle_state State, + const std::vector &Options, std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBins) + const { assert(MRTCBinInfo); assert(MRTCBinInfo->MLanguage == syclex::source_language::sycl); assert(std::holds_alternative(MBinImage)); @@ -993,8 +995,9 @@ class device_image_impl { SourceExt << SourceStr << '\n'; auto EmitEntry = - [&SourceExt](const std::string &Name) -> std::ostringstream & { - SourceExt << " {\"" << Name << "\", " << Name << "}"; + [&SourceExt]( + const sycl::detail::string_view &Name) -> std::ostringstream & { + SourceExt << " {\"" << Name.data() << "\", " << Name.data() << "}"; return SourceExt; }; @@ -1183,7 +1186,7 @@ class device_image_impl { ur_program_handle_t createProgramFromSource(const std::vector Devices, - const std::vector &Options, + const std::vector &Options, std::string *LogPtr) const { const std::shared_ptr &ContextImpl = getSyclObjImpl(MContext); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index d9fdbff15e339..e6cf0001a9a8a 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -527,11 +527,11 @@ class kernel_bundle_impl { MDeviceImages.emplace_back(DevImg); } - std::shared_ptr - build_from_source(const std::vector Devices, - const std::vector &BuildOptions, - std::string *LogPtr, - const std::vector &RegisteredKernelNames) { + std::shared_ptr build_from_source( + const std::vector Devices, + const std::vector &BuildOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { assert(MState == bundle_state::ext_oneapi_source && "bundle_state::ext_oneapi_source required"); assert(allSourceBasedImages() && "All images must be source-based."); @@ -551,11 +551,11 @@ class kernel_bundle_impl { bundle_state::executable); } - std::shared_ptr - compile_from_source(const std::vector Devices, - const std::vector &CompileOptions, - std::string *LogPtr, - const std::vector &RegisteredKernelNames) { + std::shared_ptr compile_from_source( + const std::vector Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { assert(MState == bundle_state::ext_oneapi_source && "bundle_state::ext_oneapi_source required"); assert(allSourceBasedImages() && "All images must be source-based."); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp index 63907ff913dca..5eca06f7b1ee7 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp @@ -193,12 +193,13 @@ std::string InvokeOclocQuery(const std::vector &IPVersionVec, return QueryLog; } -spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, - const std::vector &IPVersionVec, - const std::vector &UserArgs, - std::string *LogPtr) { - std::vector CMUserArgs = UserArgs; - CMUserArgs.push_back("-cmc"); +spirv_vec_t +OpenCLC_to_SPIRV(const std::string &Source, + const std::vector &IPVersionVec, + const std::vector &UserArgs, + std::string *LogPtr) { + std::vector CMUserArgs = UserArgs; + CMUserArgs.push_back(sycl::detail::string_view{"-cmc"}); // handles into ocloc shared lib static void *oclocInvokeHandle = nullptr; @@ -208,11 +209,11 @@ spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, SetupLibrary(oclocInvokeHandle, oclocFreeOutputHandle, build_errc); // assemble ocloc args - std::string CombinedUserArgs = - std::accumulate(UserArgs.begin(), UserArgs.end(), std::string(""), - [](const std::string &acc, const std::string &s) { - return acc + s + " "; - }); + std::string CombinedUserArgs = ""; + for (const sycl::detail::string_view &UserArg : UserArgs) { + CombinedUserArgs += UserArg.data(); + CombinedUserArgs += " "; + } std::vector Args = {"ocloc", "-q", "-spv_only", "-options", CombinedUserArgs.c_str()}; diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp index d618c86e07d97..9551b95a23132 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp @@ -20,10 +20,11 @@ namespace ext::oneapi::experimental { namespace detail { using spirv_vec_t = std::vector; -spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, - const std::vector &IPVersionVec, - const std::vector &UserArgs, - std::string *LogPtr); +spirv_vec_t +OpenCLC_to_SPIRV(const std::string &Source, + const std::vector &IPVersionVec, + const std::vector &UserArgs, + std::string *LogPtr); bool OpenCLC_Compilation_Available(); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 88f2b46ee9170..93ec94a8ac328 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -21,12 +21,15 @@ inline namespace _V1 { namespace ext::oneapi::experimental { namespace detail { -std::string userArgsAsString(const std::vector &UserArguments) { - return std::accumulate(UserArguments.begin(), UserArguments.end(), - std::string(""), - [](const std::string &A, const std::string &B) { - return A.empty() ? B : A + " " + B; - }); +std::string +userArgsAsString(const std::vector &UserArguments) { + std::string Result = ""; + for (const sycl::detail::string_view &UserArg : UserArguments) { + if (!Result.empty()) + Result += " "; + Result += UserArg.data(); + } + return Result; } bool SYCL_JIT_Compilation_Available() { @@ -37,16 +40,19 @@ bool SYCL_JIT_Compilation_Available() { #endif } -std::pair -SYCL_JIT_Compile([[maybe_unused]] const std::string &SYCLSource, - [[maybe_unused]] const include_pairs_t &IncludePairs, - [[maybe_unused]] const std::vector &UserArgs, - [[maybe_unused]] std::string *LogPtr) { +std::pair SYCL_JIT_Compile( + [[maybe_unused]] const std::string &SYCLSource, + [[maybe_unused]] const include_pairs_t &IncludePairs, + [[maybe_unused]] const std::vector &UserArgs, + [[maybe_unused]] std::string *LogPtr) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); + std::vector UserArgStrings; + for (const sycl::detail::string_view UserArg : UserArgs) + UserArgStrings.push_back(UserArg.data()); return sycl::detail::jit_compiler::get_instance().compileSYCL( - CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr); + CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 86215b75baf12..047a10a061e1f 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -10,6 +10,7 @@ #include #include // __SYCL_EXPORT +#include #include // sycl_device_binaries @@ -23,7 +24,8 @@ namespace detail { using include_pairs_t = std::vector>; -std::string userArgsAsString(const std::vector &UserArguments); +std::string +userArgsAsString(const std::vector &UserArguments); // Compile the given SYCL source string and virtual include files into the image // format understood by the program manager. @@ -32,7 +34,8 @@ std::string userArgsAsString(const std::vector &UserArguments); // bundle-specific prefix used for loading the kernels. std::pair SYCL_JIT_Compile(const std::string &Source, const include_pairs_t &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr); + const std::vector &UserArgs, + std::string *LogPtr); void SYCL_JIT_Destroy(sycl_device_binaries Binaries); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index dc0ff4030472f..4cb36f489584b 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -468,14 +468,6 @@ obj_kb compile_from_source( const std::vector &BuildOptions, sycl::detail::string *LogView, const std::vector &RegisteredKernelNames) { - std::vector Options; - for (const sycl::detail::string_view option : BuildOptions) - Options.push_back(option.data()); - - std::vector KernelNames; - for (const sycl::detail::string_view name : RegisteredKernelNames) - KernelNames.push_back(name.data()); - std::string Log; std::string *LogPtr = nullptr; if (LogView) @@ -484,7 +476,7 @@ obj_kb compile_from_source( sycl::detail::removeDuplicateDevices(Devices); std::shared_ptr sourceImpl = getSyclObjImpl(SourceKB); std::shared_ptr KBImpl = sourceImpl->compile_from_source( - UniqueDevices, Options, LogPtr, KernelNames); + UniqueDevices, BuildOptions, LogPtr, RegisteredKernelNames); auto result = sycl::detail::createSyclObjFromImpl(KBImpl); if (LogView) *LogView = Log; @@ -500,14 +492,6 @@ exe_kb build_from_source( const std::vector &BuildOptions, sycl::detail::string *LogView, const std::vector &RegisteredKernelNames) { - std::vector Options; - for (const sycl::detail::string_view option : BuildOptions) - Options.push_back(option.data()); - - std::vector KernelNames; - for (const sycl::detail::string_view name : RegisteredKernelNames) - KernelNames.push_back(name.data()); - std::string Log; std::string *LogPtr = nullptr; if (LogView) @@ -517,7 +501,7 @@ exe_kb build_from_source( const std::shared_ptr &sourceImpl = getSyclObjImpl(SourceKB); std::shared_ptr KBImpl = sourceImpl->build_from_source( - UniqueDevices, Options, LogPtr, KernelNames); + UniqueDevices, BuildOptions, LogPtr, RegisteredKernelNames); auto result = sycl::detail::createSyclObjFromImpl(std::move(KBImpl)); if (LogView) *LogView = Log; From 3cc2029c53c36f001d837e50d4cc9bdb551126a2 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 22:23:31 -0700 Subject: [PATCH 25/31] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 839b0e90e6523..d5bac99801267 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -980,7 +980,8 @@ class device_image_impl { std::vector> createSYCLImages( const std::vector &Devices, bundle_state State, - const std::vector &Options, std::string *LogPtr, + const std::vector &Options, + std::string *LogPtr, const std::vector &RegisteredKernelNames, std::vector> &OutDeviceBins) const { From b5324f0c90761d922d2d115c8e40c4b37203a5ba Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 23:05:31 -0700 Subject: [PATCH 26/31] Fix build error Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_impl.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 9f68d6795dbc4..3daee613a05bc 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -906,9 +906,11 @@ bool device_impl::extOneapiCanCompile( ext::oneapi::experimental::source_language Language) { try { // Currently only SYCL language is supported for compiling. + std::shared_ptr Self = MPlatform->getOrMakeDeviceImpl(MDevice); return Language == ext::oneapi::experimental::source_language::sycl && sycl::ext::oneapi::experimental::detail:: - is_source_kernel_bundle_supported(getBackend(), Language); + is_source_kernel_bundle_supported( + Language, std::vector{Self}); } catch (sycl::exception &) { return false; } From d6dc0832cb07e5958331dad8f12fbb4f42268300 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 28 Apr 2025 23:11:05 -0700 Subject: [PATCH 27/31] Remove unused code Signed-off-by: Larsen, Steffen --- sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp index d668df5894e95..5cc859f47cf3e 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp @@ -209,9 +209,6 @@ OpenCLC_to_SPIRV(const std::string &Source, const std::vector &IPVersionVec, const std::vector &UserArgs, std::string *LogPtr) { - std::vector CMUserArgs = UserArgs; - CMUserArgs.push_back(sycl::detail::string_view{"-cmc"}); - // handles into ocloc shared lib static void *oclocInvokeHandle = nullptr; static void *oclocFreeOutputHandle = nullptr; From 81943eadf960a4f049dda4a379df5b3e5170756f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 29 Apr 2025 03:22:51 -0700 Subject: [PATCH 28/31] Fix windows build Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index d5bac99801267..0c1db96e693a8 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -842,7 +842,7 @@ class device_image_impl { return !std::isspace(c); }).base(); if (Start != std::end(str) && End != std::begin(str) && Start < End) { - return std::string_view(Start, std::distance(Start, End)); + return std::string_view(&*Start, std::distance(Start, End)); } return ""; From 62f06fc51e04ba9266c7a9986a9d3419899281df Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 29 Apr 2025 07:00:42 -0700 Subject: [PATCH 29/31] Switch to unordered_map Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 2d8295b400038..f3b8042fe7964 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -15,8 +15,8 @@ #include #include #include -#include #include +#include #include namespace sycl { @@ -274,11 +274,12 @@ naiveMergeBinaryProperties(const std::vector &Imgs, // Exclusive property merge logic. If IgnoreDuplicates is false it assumes there // are no cases where properties have different values and throws otherwise. template -static std::map +static std::unordered_map exclusiveMergeBinaryProperties( const std::vector &Imgs, const RangeGetterT &RangeGetter, bool IgnoreDuplicates = false) { - std::map MergeMap; + std::unordered_map + MergeMap; for (const RTDeviceBinaryImage *Img : Imgs) { const RTDeviceBinaryImage::PropertyRange &Range = RangeGetter(*Img); for (const sycl_device_binary_property Prop : Range) { @@ -303,7 +304,8 @@ exclusiveMergeBinaryProperties( // Device requirements needs the ability to produce new properties. The // information for these are kept in this struct. struct MergedDeviceRequirements { - std::map MergeMap; + std::unordered_map + MergeMap; std::unordered_set Aspects; std::unordered_set JointMatrix; std::unordered_set JointMatrixMad; @@ -542,8 +544,9 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( return Img.getMiscProperties(); }); - std::array< - const std::map *, 4> + std::array *, + 4> MergedMaps{&MergedDeviceLibReqMask, &MergedProgramMetadata, &MergedImportedSymbols, &MergedMisc}; From 5a189dc934aba3590cca8e7ec3a1674eb22d6c97 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 1 May 2025 06:12:10 +0200 Subject: [PATCH 30/31] Update sycl/include/sycl/kernel_bundle.hpp Co-authored-by: Sergey Semenov --- sycl/include/sycl/kernel_bundle.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 0beff1ebc2205..6c16f969303df 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -1202,6 +1202,7 @@ compile_from_source(kernel_bundle &SourceKB, Options.push_back(sycl::detail::string_view{opt}); std::vector KernelNames; + KernelNames.reserve(RegisteredKernelNames); for (const std::string &name : RegisteredKernelNames) KernelNames.push_back(sycl::detail::string_view{name}); From daeacc3b81d1788534967d659c97fd4ccfbfc198 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 1 May 2025 06:23:51 +0200 Subject: [PATCH 31/31] Update sycl/include/sycl/kernel_bundle.hpp --- sycl/include/sycl/kernel_bundle.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 6c16f969303df..2002fdea201ad 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -1202,7 +1202,7 @@ compile_from_source(kernel_bundle &SourceKB, Options.push_back(sycl::detail::string_view{opt}); std::vector KernelNames; - KernelNames.reserve(RegisteredKernelNames); + KernelNames.reserve(RegisteredKernelNames.size()); for (const std::string &name : RegisteredKernelNames) KernelNames.push_back(sycl::detail::string_view{name});