diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index aa7bed8f123db..0f5da5dbc549f 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -157,14 +157,34 @@ a! ---- class device { +bool ext_oneapi_can_build(ext::oneapi::experimental::source_language lang); + +}; +---- +!==== + +_Returns:_ The value `true` only if the device supports the +`ext::oneapi::experimental::build` function on kernel bundles written in the +source language `lang`. + +a| +[frame=all,grid=none] +!==== +a! +[source,c++] +---- +class device { + bool ext_oneapi_can_compile(ext::oneapi::experimental::source_language lang); }; ---- !==== -_Returns:_ The value `true` only if the device supports kernel bundles written -in the source language `lang`. +_Returns:_ The value `true` only if the device supports the +`ext::oneapi::experimental::compile` function on kernel bundles written in the +source language `lang`. + |==== === New free functions to create and build kernel bundles @@ -226,8 +246,6 @@ state. _Throws:_ -* An `exception` with the `errc::invalid` error code if the source language - `lang` is not supported by any device contained by the context `ctxt`. * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT`. * Overload (1) throws an `exception` with the `errc::invalid` error code if the @@ -241,9 +259,11 @@ function. This function succeeds even if some devices in `ctxt` do not support the source language `lang`. -However, the `build` function fails unless _all_ of its devices support `lang`. -Therefore, applications should take care to omit devices that do not support -`lang` when calling `build`. +However, the `build` and `compile` functions will fail if any of its devices +return `false` for `ext_oneapi_can_build(lang)` and +`ext_oneapi_can_compile(lang)` respectively. Therefore, applications should take +care to omit devices that do not support `lang` for the functions they intend on +calling. _{endnote}_] a| @@ -271,8 +291,8 @@ kernel_bundle build( _Constraints:_ Available only when `PropertyListT` is an instance of `sycl::ext::oneapi::experimental::properties` which contains no properties -other than those listed below in the section "New properties for the `build` -function". +other than those listed below in the section "New properties for the `build` and +`compile` functions". _Effects (1):_ The source code from `sourceBundle` is translated into one or more device images of state `bundle_state::executable`, and a new kernel bundle is @@ -293,16 +313,16 @@ _Returns:_ The newly created kernel bundle, which has `executable` state. _Throws:_ * An `exception` with the `errc::invalid` error code if any of the devices in - `devs` is not contained by the context associated with `sourceBundle`. + `devs` return `false` for `ext_oneapi_can_build` with the source language of + `sourceBundle`. * An `exception` with the `errc::invalid` error code if any of the devices in - `devs` does not support compilation of kernels in the source language of - `sourceBundle`. + `devs` is not contained by the context associated with `sourceBundle`. * An `exception` with the `errc::invalid` error code if the source language `lang` does not support one of the properties in `PropertyListT` or if `props` contains a `build_options` property that contains an option that is - not supported by `lang`. + not supported when building `lang`. * An `exception` with the `errc::build` error code if the compilation or linking operations fail. @@ -317,6 +337,78 @@ source code used to create the kernel bundle being printed to the terminal. In situations where this is undesirable, developers must ensure that the exception is caught and handled appropriately. _{endnote}_] + +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +namespace sycl::ext::oneapi::experimental { + +template (1) +kernel_bundle compile( + const kernel_bundle& sourceBundle, + const std::vector& devs, PropertyListT props={}) + +template (2) +kernel_bundle compile( + const kernel_bundle& sourceBundle, + PropertyListT props = {}) + +} // namespace sycl::ext::oneapi::experimental +---- +!==== + + +_Constraints:_ Available only when `PropertyListT` is an instance of +`sycl::ext::oneapi::experimental::properties` which contains no properties +other than those listed below in the section "New properties for the `build` and +`compile` functions". + +_Effects (1):_ The source code from `sourceBundle` is translated into one or +more device images of state `bundle_state::object`, and a new kernel bundle is +created to contain these device images. +The new bundle represents all of the kernels in `sourceBundle` that are +compatible with at least one of the devices in `devs`. +Any remaining kernels (those that are not compatible with any of the devices in +`devs`) are not represented in the new kernel bundle. + +The new bundle has the same associated context as `sourceBundle`, and the new +bundle's set of associated devices is `devs` (with duplicate devices removed). + +_Effects (2)_: Equivalent to +`compile(sourceBundle, sourceBundle.get_devices(), props)`. + +_Returns:_ The newly created kernel bundle, which has `object` state. + +_Throws:_ + +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` return `false` for `ext_oneapi_can_compile` with the source language of + `sourceBundle`. + +* An `exception` with the `errc::invalid` error code if any of the devices in + `devs` is not contained by the context associated with `sourceBundle`. + +* An `exception` with the `errc::invalid` error code if the source language + `lang` does not support one of the properties in `PropertyListT` or if + `props` contains a `build_options` property that contains an option that is + not supported when compiling `lang`. + +* An `exception` with the `errc::build` error code if the compilation operation + fails. In this case, the exception `what` string provides a full build log, + including descriptions of any errors, warning messages, and other + diagnostics. + This string is intended for human consumption, and the format may not be + stable across implementations of this extension. + +[_Note:_ An uncaught `errc::build` exception may result in some or all of the +source code used to create the kernel bundle being printed to the terminal. +In situations where this is undesirable, developers must ensure that the +exception is caught and handled appropriately. +_{endnote}_] + |==== === New properties for the `create_kernel_bundle_from_source` function @@ -384,10 +476,10 @@ _Throws (3):_ entry with `name` in this property. |==== -=== New properties for the `build` function +=== New properties for the `build` and `compile` functions This extension adds the following properties, which can be used in conjunction -with the `build` function that is defined above: +with the `build` and `compile` function that is defined above: |==== a| diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc index 7d79e75634c4d..f5d970d4f3e2d 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler_opencl.asciidoc @@ -461,7 +461,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - if (d.ext_oneapi_can_compile(syclex::source_language::opencl)) + if (d.ext_oneapi_can_build(syclex::source_language::opencl)) std::cout << "Device supports online compilation of OpenCL C kernels\n"; if (d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_3_0)) diff --git a/sycl/include/sycl/device.hpp b/sycl/include/sycl/device.hpp index 8a9a331863703..40dae8561b92c 100644 --- a/sycl/include/sycl/device.hpp +++ b/sycl/include/sycl/device.hpp @@ -299,14 +299,27 @@ class __SYCL_EXPORT device : public detail::OwnerLessBase { /// kernel_compiler extension + /// Indicates if the device can build a kernel for the given language. + /// + /// \param Language is one of the values from the + /// kernel_bundle::source_language enumeration described in the + /// sycl_ext_oneapi_kernel_compiler specification + /// + /// \return The value true only if the device supports the + /// ext::oneapi::experimental::build function on kernel bundles written in + /// the source language \p Language. + bool + ext_oneapi_can_build(ext::oneapi::experimental::source_language Language); + /// Indicates if the device can compile a kernel for the given language. /// /// \param Language is one of the values from the /// kernel_bundle::source_language enumeration described in the /// sycl_ext_oneapi_kernel_compiler specification /// - /// \return true only if the device supports kernel bundles written in the - /// source language `lang`. + /// \return The value true only if the device supports the + /// ext::oneapi::experimental::compile function on kernel bundles written in + /// the source language \p Language. bool ext_oneapi_can_compile(ext::oneapi::experimental::source_language Language); diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index 7fd27f75c0628..2002fdea201ad 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -1165,6 +1165,7 @@ build_from_source(kernel_bundle &SourceKB, std::string *LogPtr, const std::vector &RegisteredKernelNames) { std::vector Options; + Options.reserve(BuildOptions.size()); for (const std::string &opt : BuildOptions) Options.push_back(sycl::detail::string_view{opt}); @@ -1181,6 +1182,38 @@ build_from_source(kernel_bundle &SourceKB, } return build_from_source(SourceKB, Devices, Options, nullptr, KernelNames); } + +__SYCL_EXPORT kernel_bundle compile_from_source( + kernel_bundle &SourceKB, + const std::vector &Devices, + const std::vector &CompileOptions, + sycl::detail::string *LogPtr, + const std::vector &RegisteredKernelNames); + +inline kernel_bundle +compile_from_source(kernel_bundle &SourceKB, + const std::vector &Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { + std::vector Options; + Options.reserve(CompileOptions.size()); + for (const std::string &opt : CompileOptions) + Options.push_back(sycl::detail::string_view{opt}); + + std::vector KernelNames; + KernelNames.reserve(RegisteredKernelNames.size()); + for (const std::string &name : RegisteredKernelNames) + KernelNames.push_back(sycl::detail::string_view{name}); + + sycl::detail::string Log; + auto result = compile_from_source(SourceKB, Devices, Options, + LogPtr ? &Log : nullptr, KernelNames); + if (LogPtr) + *LogPtr = Log.c_str(); + return result; +} + } // namespace detail ///////////////////////// @@ -1218,6 +1251,39 @@ kernel_bundle create_kernel_bundle_from_source( } #endif +///////////////////////// +// syclex::compile(source_kb) => obj_kb +///////////////////////// + +template >> +kernel_bundle +compile(kernel_bundle &SourceKB, + const std::vector &Devices, PropertyListT props = {}) { + std::vector CompileOptionsVec; + std::string *LogPtr = nullptr; + std::vector RegisteredKernelNamesVec; + if constexpr (props.template has_property()) + CompileOptionsVec = props.template get_property().opts; + if constexpr (props.template has_property()) + LogPtr = props.template get_property().log; + if constexpr (props.template has_property()) + RegisteredKernelNamesVec = + props.template get_property().names; + return detail::compile_from_source(SourceKB, Devices, CompileOptionsVec, + LogPtr, RegisteredKernelNamesVec); +} + +template >> +kernel_bundle +compile(kernel_bundle &SourceKB, + PropertyListT props = {}) { + return compile(SourceKB, SourceKB.get_devices(), props); +} + ///////////////////////// // syclex::build(source_kb) => exe_kb ///////////////////////// diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 373241cd026c0..f3b8042fe7964 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -13,8 +13,11 @@ #include #include +#include #include #include +#include +#include namespace sycl { inline namespace _V1 { @@ -147,21 +150,26 @@ void RTDeviceBinaryImage::dump(std::ostream &Out) const { sycl_device_binary_property RTDeviceBinaryImage::getProperty(const char *PropName) const { - RTDeviceBinaryImage::PropertyRange BoolProp; - BoolProp.init(Bin, __SYCL_PROPERTY_SET_SYCL_MISC_PROP); - if (!BoolProp.isAvailable()) + if (!Misc.isAvailable()) return nullptr; - auto It = std::find_if(BoolProp.begin(), BoolProp.end(), + auto It = std::find_if(Misc.begin(), Misc.end(), [=](sycl_device_binary_property Prop) { return !strcmp(PropName, Prop->Name); }); - if (It == BoolProp.end()) + if (It == Misc.end()) return nullptr; return *It; } void RTDeviceBinaryImage::init(sycl_device_binary Bin) { + ImageId = ImageCounter++; + + // If there was no binary, we let the owner handle initialization as they see + // fit. This is used when merging binaries, e.g. during linking. + if (!Bin) + return; + // Bin != nullptr is guaranteed here. this->Bin = Bin; // If device binary image format wasn't set by its producer, then can't change @@ -198,16 +206,12 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { HostPipes.init(Bin, __SYCL_PROPERTY_SET_SYCL_HOST_PIPES); VirtualFunctions.init(Bin, __SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS); RegisteredKernels.init(Bin, __SYCL_PROPERTY_SET_SYCL_REGISTERED_KERNELS); - - ImageId = ImageCounter++; + Misc.init(Bin, __SYCL_PROPERTY_SET_SYCL_MISC_PROP); } std::atomic RTDeviceBinaryImage::ImageCounter = 1; -DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( - std::unique_ptr &&DataPtr, size_t DataSize) - : RTDeviceBinaryImage() { - Data = std::move(DataPtr); +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { Bin = new sycl_device_binary_struct(); Bin->Version = SYCL_DEVICE_BINARY_VERSION; Bin->Kind = SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL; @@ -215,10 +219,21 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( Bin->LinkOptions = ""; Bin->ManifestStart = nullptr; Bin->ManifestEnd = nullptr; - Bin->BinaryStart = reinterpret_cast(Data.get()); - Bin->BinaryEnd = Bin->BinaryStart + DataSize; + Bin->BinaryStart = nullptr; + Bin->BinaryEnd = nullptr; Bin->EntriesBegin = nullptr; Bin->EntriesEnd = nullptr; + Bin->Format = SYCL_DEVICE_BINARY_TYPE_NONE; + Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; +} + +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize) + : DynRTDeviceBinaryImage() { + Data = std::move(DataPtr); + Bin->BinaryStart = reinterpret_cast(Data.get()); + Bin->BinaryEnd = Bin->BinaryStart + DataSize; Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); switch (Bin->Format) { case SYCL_DEVICE_BINARY_TYPE_SPIRV: @@ -235,6 +250,423 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { Bin = nullptr; } +// "Naive" property merge logic. It merges the properties into a single property +// vector without checking for duplicates. As such, duplicates may occur in the +// final result. +template +static std::vector +naiveMergeBinaryProperties(const std::vector &Imgs, + const RangeGetterT &RangeGetter) { + size_t PropertiesCount = 0; + for (const RTDeviceBinaryImage *Img : Imgs) + PropertiesCount += RangeGetter(*Img).size(); + + std::vector Props; + Props.reserve(PropertiesCount); + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = RangeGetter(*Img); + Props.insert(Props.end(), Range.begin(), Range.end()); + } + + return Props; +} + +// Exclusive property merge logic. If IgnoreDuplicates is false it assumes there +// are no cases where properties have different values and throws otherwise. +template +static std::unordered_map +exclusiveMergeBinaryProperties( + const std::vector &Imgs, + const RangeGetterT &RangeGetter, bool IgnoreDuplicates = false) { + std::unordered_map + MergeMap; + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = RangeGetter(*Img); + for (const sycl_device_binary_property Prop : Range) { + const auto [It, Inserted] = + MergeMap.try_emplace(std::string_view{Prop->Name}, Prop); + if (IgnoreDuplicates || Inserted) + continue; + // If we didn't insert a new entry, check that the old entry had the + // exact same value. + const sycl_device_binary_property OtherProp = It->second; + if (OtherProp->Type != Prop->Type || + OtherProp->ValSize != Prop->ValSize || + (Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY && + std::memcmp(OtherProp->ValAddr, Prop->ValAddr, Prop->ValSize) != 0)) + throw sycl::exception(make_error_code(errc::invalid), + "Unable to merge incompatible images."); + } + } + return MergeMap; +} + +// Device requirements needs the ability to produce new properties. The +// information for these are kept in this struct. +struct MergedDeviceRequirements { + std::unordered_map + MergeMap; + std::unordered_set Aspects; + std::unordered_set JointMatrix; + std::unordered_set JointMatrixMad; + + size_t getPropertiesCount() const { + return MergeMap.size() + !Aspects.empty() + !JointMatrix.empty() + + !JointMatrixMad.empty(); + } + + size_t getAspectsContentSize() const { + return Aspects.size() * sizeof(uint32_t); + } + + static size_t + getStringSetContentSize(const std::unordered_set &Set) { + size_t Result = 0; + Result += Set.size() - 1; // Semi-colon delimiters. + for (const std::string_view &Str : Set) // Strings. + Result += Str.size(); + return Result; + } + + size_t getPropertiesContentByteSize() const { + size_t Result = 0; + for (const auto &PropIt : MergeMap) + Result += strlen(PropIt.second->Name) + 1 + PropIt.second->ValSize; + + if (!Aspects.empty()) + Result += strlen("aspects") + 1 + getAspectsContentSize(); + + if (!JointMatrix.empty()) + Result += + strlen("joint_matrix") + 1 + getStringSetContentSize(JointMatrix); + + if (!JointMatrixMad.empty()) + Result += strlen("joint_matrix_mad") + 1 + + getStringSetContentSize(JointMatrixMad); + + return Result; + } + + void writeAspectProperty(sycl_device_binary_property &NextFreeProperty, + char *&NextFreeContent) const { + if (Aspects.empty()) + return; + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; + NewProperty->ValSize = getAspectsContentSize(); + // Copy the name. + const size_t NameLen = std::strlen("aspects"); + std::memcpy(NextFreeContent, "aspects", NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. + uint32_t *AspectContentIt = reinterpret_cast(NextFreeContent); + for (uint32_t Aspect : Aspects) + *(AspectContentIt++) = Aspect; + NewProperty->ValAddr = NextFreeContent; + NextFreeContent += NewProperty->ValSize; + } + + static void writeStringSetProperty( + const std::unordered_set &Set, const char *SetName, + sycl_device_binary_property &NextFreeProperty, char *&NextFreeContent) { + if (Set.empty()) + return; + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = SYCL_PROPERTY_TYPE_BYTE_ARRAY; + NewProperty->ValSize = getStringSetContentSize(Set); + // Copy the name. + const size_t NameLen = std::strlen(SetName); + std::memcpy(NextFreeContent, SetName, NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. + NewProperty->ValAddr = NextFreeContent; + for (auto StrIt = Set.begin(); StrIt != Set.end(); ++StrIt) { + if (StrIt != Set.begin()) + *(NextFreeContent++) = ';'; + std::memcpy(NextFreeContent, StrIt->data(), StrIt->size()); + NextFreeContent += StrIt->size(); + } + } +}; + +// Merging device requirements is a little more involved, as it may impose +// new requirements. +static MergedDeviceRequirements +mergeDeviceRequirements(const std::vector &Imgs) { + MergedDeviceRequirements MergedReqs; + for (const RTDeviceBinaryImage *Img : Imgs) { + const RTDeviceBinaryImage::PropertyRange &Range = + Img->getDeviceRequirements(); + for (const sycl_device_binary_property Prop : Range) { + std::string_view NameView{Prop->Name}; + + // Aspects we collect in a set early and add them afterwards. + if (NameView == "aspects") { + // Skip size bytes. + auto AspectIt = reinterpret_cast( + reinterpret_cast(Prop->ValAddr) + 8); + for (size_t I = 0; I < Prop->ValSize / sizeof(uint32_t); ++I) + MergedReqs.Aspects.emplace(AspectIt[I]); + continue; + } + + // joint_matrix and joint_matrix_mad have the same format, so we parse + // them the same way. + if (NameView == "joint_matrix" || NameView == "joint_matrix_mad") { + std::unordered_set &Set = + NameView == "joint_matrix" ? MergedReqs.JointMatrix + : MergedReqs.JointMatrixMad; + + // Skip size bytes. + std::string_view Contents{reinterpret_cast(Prop->ValAddr) + 8, + Prop->ValSize}; + size_t Pos = 0; + do { + const size_t NextPos = Contents.find(';', Pos); + if (NextPos != Pos) + Set.emplace(Contents.substr(Pos, NextPos - Pos)); + Pos = NextPos + 1; + } while (Pos != 0); + continue; + } + + const auto [It, Inserted] = + MergedReqs.MergeMap.try_emplace(NameView, Prop); + if (Inserted) + continue; + // Special handling has already happened, so we assume the rest are + // exclusive property values. + const sycl_device_binary_property OtherProp = It->second; + if (OtherProp->Type != Prop->Type || + OtherProp->ValSize != Prop->ValSize || + (Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY && + std::memcmp(OtherProp->ValAddr, Prop->ValAddr, Prop->ValSize) != 0)) + throw sycl::exception(make_error_code(errc::invalid), + "Unable to merge incompatible images."); + } + } + return MergedReqs; +} + +// Copies a property into new memory. +static void copyProperty(sycl_device_binary_property &NextFreeProperty, + char *&NextFreeContent, + const sycl_device_binary_property OldProperty) { + // Get the next free property entry and move the needle. + sycl_device_binary_property NewProperty = NextFreeProperty++; + NewProperty->Type = OldProperty->Type; + NewProperty->ValSize = OldProperty->ValSize; + // Copy the name. + const size_t NameLen = std::strlen(OldProperty->Name); + std::memcpy(NextFreeContent, OldProperty->Name, NameLen + 1); + NewProperty->Name = NextFreeContent; + NextFreeContent += NameLen + 1; + // Copy the values. If the type is uint32 it will have been stored in the size + // instead of the value address. + if (OldProperty->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY) { + std::memcpy(NextFreeContent, OldProperty->ValAddr, OldProperty->ValSize); + NewProperty->ValAddr = NextFreeContent; + NextFreeContent += OldProperty->ValSize; + } else { + NewProperty->ValAddr = nullptr; + } +} + +DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( + const std::vector &Imgs) + : DynRTDeviceBinaryImage() { + init(nullptr); + + // Naive merges. + auto MergedSpecConstants = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getSpecConstants(); + }); + auto MergedSpecConstantsDefaultValues = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getSpecConstantsDefaultValues(); + }); + auto MergedKernelParamOptInfo = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getKernelParamOptInfo(); + }); + auto MergedAssertUsed = naiveMergeBinaryProperties( + Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getAssertUsed(); }); + auto MergedDeviceGlobals = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getDeviceGlobals(); + }); + auto MergedHostPipes = naiveMergeBinaryProperties( + Imgs, [](const RTDeviceBinaryImage &Img) { return Img.getHostPipes(); }); + auto MergedVirtualFunctions = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getVirtualFunctions(); + }); + auto MergedImplicitLocalArg = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getImplicitLocalArg(); + }); + auto MergedExportedSymbols = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getExportedSymbols(); + }); + auto MergedRegisteredKernels = + naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getRegisteredKernels(); + }); + + std::array *, 10> MergedVecs{ + &MergedSpecConstants, &MergedSpecConstantsDefaultValues, + &MergedKernelParamOptInfo, &MergedAssertUsed, + &MergedDeviceGlobals, &MergedHostPipes, + &MergedVirtualFunctions, &MergedImplicitLocalArg, + &MergedExportedSymbols, &MergedRegisteredKernels}; + + // Exclusive merges. + auto MergedDeviceLibReqMask = + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getDeviceLibReqMask(); + }); + auto MergedProgramMetadata = + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getProgramMetadata(); + }); + auto MergedImportedSymbols = exclusiveMergeBinaryProperties( + Imgs, + [](const RTDeviceBinaryImage &Img) { return Img.getImportedSymbols(); }, + /*IgnoreDuplicates=*/true); + auto MergedMisc = + exclusiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { + return Img.getMiscProperties(); + }); + + std::array *, + 4> + MergedMaps{&MergedDeviceLibReqMask, &MergedProgramMetadata, + &MergedImportedSymbols, &MergedMisc}; + + // When merging exported and imported, the exported symbols may cancel out + // some of the imported symbols. + for (const sycl_device_binary_property Prop : MergedExportedSymbols) + MergedImportedSymbols.erase(std::string_view{Prop->Name}); + + // For device requirements we need to do special handling to merge the + // property values as well. + MergedDeviceRequirements MergedDevReqs = mergeDeviceRequirements(Imgs); + + // Now that we have merged all properties, we need to calculate how much + // memory we need to store the new property sets. + constexpr size_t PropertyByteSize = + sizeof(_sycl_device_binary_property_struct); + constexpr size_t PropertyAlignment = + alignof(_sycl_device_binary_property_struct); + constexpr size_t PaddedPropertyByteSize = + (1 + ((PropertyByteSize - 1) / PropertyAlignment)) * PropertyAlignment; + + // Count the total number of property entries. + size_t PropertyCount = 0; + for (const auto &Vec : MergedVecs) + PropertyCount += Vec->size(); + for (const auto &Map : MergedMaps) + PropertyCount += Map->size(); + PropertyCount += MergedDevReqs.getPropertiesCount(); + + // Count the bytes needed for the values and names of the properties. + auto GetPropertyContentSize = [](const sycl_device_binary_property Prop) { + return Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? Prop->ValSize : 0; + }; + size_t PropertyContentByteSize = 0; + for (const auto &Vec : MergedVecs) + for (const auto &Prop : *Vec) + PropertyContentByteSize += + strlen(Prop->Name) + 1 + GetPropertyContentSize(Prop); + for (const auto &Map : MergedMaps) + for (const auto &PropIt : *Map) + PropertyContentByteSize += strlen(PropIt.second->Name) + 1 + + GetPropertyContentSize(PropIt.second); + PropertyContentByteSize += MergedDevReqs.getPropertiesContentByteSize(); + + const size_t PropertySectionSize = PropertyCount * PaddedPropertyByteSize; + + // Allocate the memory aligned to the property entry alignment. + // Note: MSVC does not implement std::aligned_alloc. + Data = std::unique_ptr>( +#ifdef _MSC_VER + static_cast(_aligned_malloc(sizeof(char) * PropertySectionSize + + PropertyContentByteSize, + PropertyAlignment)), + _aligned_free +#else + static_cast(std::aligned_alloc( + PropertyAlignment, + sizeof(char) * PropertySectionSize + PropertyContentByteSize)), + std::free +#endif + ); + + auto NextFreeProperty = + reinterpret_cast(Data.get()); + char *NextFreeContent = Data.get() + PropertySectionSize; + + auto CopyPropertiesVec = + [&](const auto &Properties, + RTDeviceBinaryImage::PropertyRange &TargetRange) { + if (Properties.empty()) + return; + TargetRange.Begin = NextFreeProperty; + for (const sycl_device_binary_property Prop : Properties) + copyProperty(NextFreeProperty, NextFreeContent, Prop); + TargetRange.End = NextFreeProperty; + }; + auto CopyPropertiesMap = + [&](const auto &Properties, + RTDeviceBinaryImage::PropertyRange &TargetRange) { + if (Properties.empty()) + return; + TargetRange.Begin = NextFreeProperty; + for (const auto &PropIt : Properties) + copyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + TargetRange.End = NextFreeProperty; + }; + + CopyPropertiesVec(MergedSpecConstants, SpecConstIDMap); + CopyPropertiesVec(MergedSpecConstantsDefaultValues, + SpecConstDefaultValuesMap); + CopyPropertiesVec(MergedKernelParamOptInfo, KernelParamOptInfo); + CopyPropertiesVec(MergedAssertUsed, AssertUsed); + CopyPropertiesVec(MergedDeviceGlobals, DeviceGlobals); + CopyPropertiesVec(MergedHostPipes, HostPipes); + CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions); + CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg); + CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); + CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); + + CopyPropertiesMap(MergedDeviceLibReqMask, DeviceLibReqMask); + CopyPropertiesMap(MergedProgramMetadata, ProgramMetadata); + CopyPropertiesMap(MergedImportedSymbols, ImportedSymbols); + CopyPropertiesMap(MergedMisc, Misc); + + // Special handling for new device requirements. + { + DeviceRequirements.Begin = NextFreeProperty; + for (const auto &PropIt : MergedDevReqs.MergeMap) + copyProperty(NextFreeProperty, NextFreeContent, PropIt.second); + MergedDevReqs.writeAspectProperty(NextFreeProperty, NextFreeContent); + MergedDeviceRequirements::writeStringSetProperty( + MergedDevReqs.JointMatrix, "joint_matrix", NextFreeProperty, + NextFreeContent); + MergedDeviceRequirements::writeStringSetProperty( + MergedDevReqs.JointMatrixMad, "joint_matrix_mad", NextFreeProperty, + NextFreeContent); + DeviceRequirements.End = NextFreeProperty; + } +} + #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( sycl_device_binary CompressedBin) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 370c0af509d70..0069f35fd11df 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -121,6 +121,7 @@ class RTDeviceBinaryImage { size_t size() const { return std::distance(begin(), end()); } bool empty() const { return begin() == end(); } friend class RTDeviceBinaryImage; + friend class DynRTDeviceBinaryImage; bool isAvailable() const { return !(Begin == nullptr); } private: @@ -238,6 +239,7 @@ class RTDeviceBinaryImage { const PropertyRange &getRegisteredKernels() const { return RegisteredKernels; } + const PropertyRange &getMiscProperties() const { return Misc; } std::uintptr_t getImageID() const { assert(Bin && "Image ID is not available without a binary image."); @@ -245,6 +247,7 @@ class RTDeviceBinaryImage { } protected: + void init(); void init(sycl_device_binary Bin); sycl_device_binary get() const { return Bin; } @@ -266,6 +269,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange VirtualFunctions; RTDeviceBinaryImage::PropertyRange ImplicitLocalArg; RTDeviceBinaryImage::PropertyRange RegisteredKernels; + RTDeviceBinaryImage::PropertyRange Misc; std::vector ProgramMetadataUR; @@ -275,19 +279,29 @@ class RTDeviceBinaryImage { }; // Dynamically allocated device binary image, which de-allocates its binary -// data in destructor. +// data and associated metadata in destructor. class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { public: - DynRTDeviceBinaryImage(std::unique_ptr &&DataPtr, size_t DataSize); + DynRTDeviceBinaryImage( + std::unique_ptr> &&DataPtr, + size_t DataSize); ~DynRTDeviceBinaryImage() override; + // Merge ctor + DynRTDeviceBinaryImage(const std::vector &Imgs); + void print() const override { RTDeviceBinaryImage::print(); std::cerr << " DYNAMICALLY CREATED\n"; } + static DynRTDeviceBinaryImage + merge(const std::vector &Imgs); + protected: - std::unique_ptr Data; + DynRTDeviceBinaryImage(); + + std::unique_ptr> Data; }; #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 6f56f54d74e2c..0c1db96e693a8 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -169,7 +169,7 @@ struct KernelCompilerBinaryInfo { std::shared_ptr &&DeviceGlobalRegistry) : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, MMangledKernelNames{std::move(MangledKernelNames)}, - MPrefix{std::move(Prefix)}, + MPrefixes{std::move(Prefix)}, MDeviceGlobalRegistries{std::move(DeviceGlobalRegistry)} {} static std::optional @@ -192,12 +192,6 @@ struct KernelCompilerBinaryInfo { "Linking binaries with different source " "languages is not currently supported."); - if (!RTCInfo->MPrefix.empty() && !Result->MPrefix.empty() && - RTCInfo->MPrefix != Result->MPrefix) - throw sycl::exception(make_error_code(errc::invalid), - "Linking binaries with different kernel prefixes " - "is not currently supported."); - for (const std::string &KernelName : RTCInfo->MKernelNames) Result->MKernelNames.insert(KernelName); @@ -223,6 +217,9 @@ struct KernelCompilerBinaryInfo { Result->MDeviceGlobalRegistries.end(), RTCInfo->MDeviceGlobalRegistries.begin(), RTCInfo->MDeviceGlobalRegistries.end()); + + for (const std::string &Prefix : RTCInfo->MPrefixes) + Result->MPrefixes.insert(Prefix); } return Result; } @@ -230,7 +227,7 @@ struct KernelCompilerBinaryInfo { syclex::source_language MLanguage; KernelNameSetT MKernelNames; MangledKernelNameMapT MMangledKernelNames; - std::string MPrefix; + std::set MPrefixes; include_pairs_t MIncludePairs; std::vector> MDeviceGlobalRegistries; @@ -267,20 +264,21 @@ class device_image_impl { updateSpecConstSymMap(); } - device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, - const std::vector &Devices, bundle_state State, - std::shared_ptr> KernelIDs, - ur_program_handle_t Program, - const SpecConstMapT &SpecConstMap, - const std::vector &SpecConstsBlob, - uint8_t Origins, - std::optional &&RTCInfo) + device_image_impl( + const RTDeviceBinaryImage *BinImage, const context &Context, + std::vector &&Devices, bundle_state State, + std::shared_ptr> KernelIDs, + ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, + const std::vector &SpecConstsBlob, uint8_t Origins, + std::optional &&RTCInfo, + std::unique_ptr &&MergedImageStorage = nullptr) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MSpecConstSymMap(SpecConstMap), MOrigins(Origins), - MRTCBinInfo(std::move(RTCInfo)) {} + MRTCBinInfo(std::move(RTCInfo)), + MMergedImageStorage(std::move(MergedImageStorage)) {} device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, const std::vector &Devices, bundle_state State, @@ -629,18 +627,21 @@ class device_image_impl { std::string AdjustedName = adjustKernelName(Name); if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { auto &PM = ProgramManager::getInstance(); - auto KID = PM.tryGetSYCLKernelID(MRTCBinInfo->MPrefix + AdjustedName); - - if (!KID || !has_kernel(*KID)) - return nullptr; - - auto UrProgram = get_ur_program_ref(); - auto [UrKernel, CacheMutex, ArgMask] = - PM.getOrCreateKernel(Context, AdjustedName, - /*PropList=*/{}, UrProgram); - return std::make_shared(UrKernel, getSyclObjImpl(Context), - Self, OwnerBundle, ArgMask, - UrProgram, CacheMutex); + for (const std::string &Prefix : MRTCBinInfo->MPrefixes) { + auto KID = PM.tryGetSYCLKernelID(Prefix + AdjustedName); + + if (!KID || !has_kernel(*KID)) + continue; + + auto UrProgram = get_ur_program_ref(); + auto [UrKernel, CacheMutex, ArgMask] = + PM.getOrCreateKernel(Context, AdjustedName, + /*PropList=*/{}, UrProgram); + return std::make_shared(UrKernel, getSyclObjImpl(Context), + Self, OwnerBundle, ArgMask, + UrProgram, CacheMutex); + } + return nullptr; } ur_program_handle_t UrProgram = get_ur_program_ref(); @@ -692,9 +693,10 @@ class device_image_impl { } std::vector> buildFromSource( - const std::vector Devices, - const std::vector &BuildOptions, std::string *LogPtr, - const std::vector &RegisteredKernelNames, + const std::vector &Devices, + const std::vector &BuildOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, std::vector> &OutDeviceBins) const { assert(!std::holds_alternative(MBinImage)); @@ -710,7 +712,7 @@ class device_image_impl { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + if (!DevImpl->extOneapiCanBuild(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in @@ -721,170 +723,9 @@ class device_image_impl { } } - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { - assert(std::holds_alternative(MBinImage)); - - // Build device images via the program manager. - const std::string &SourceStr = std::get(MBinImage); - std::ostringstream SourceExt; - if (!RegisteredKernelNames.empty()) { - SourceExt << SourceStr << '\n'; - - auto EmitEntry = - [&SourceExt](const std::string &Name) -> std::ostringstream & { - SourceExt << " {\"" << Name << "\", " << Name << "}"; - return SourceExt; - }; - - SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; - for (auto It = RegisteredKernelNames.begin(), - SecondToLast = RegisteredKernelNames.end() - 1; - It != SecondToLast; ++It) { - EmitEntry(*It) << ",\n"; - } - EmitEntry(RegisteredKernelNames.back()) << "\n"; - SourceExt << ")]];\n"; - } - - auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( - RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr); - - auto &PM = detail::ProgramManager::getInstance(); - - // Add all binaries and keep the images for processing. - std::vector>>> - NewImages; - NewImages.reserve(Binaries->NumDeviceBinaries); - for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { - sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); - RTDeviceBinaryImage *NewImage = nullptr; - auto KernelIDs = std::make_shared>(); - PM.addImage(Binary, &NewImage, KernelIDs.get()); - if (NewImage) - NewImages.push_back( - std::make_pair(std::move(NewImage), std::move(KernelIDs))); - } - - // Now bring all images into the proper state. Note that we do this in a - // separate pass over NewImages to make sure dependency images have been - // registered beforehand. - std::vector> Result; - Result.reserve(NewImages.size()); - for (auto &[NewImage, KernelIDs] : NewImages) { - KernelNameSetT KernelNames; - MangledKernelNameMapT MangledKernelNames; - std::unordered_set DeviceGlobalIDSet; - std::vector DeviceGlobalIDVec; - std::vector DeviceGlobalNames; - std::vector> DeviceGlobalAllocations; - - for (const auto &KernelID : *KernelIDs) { - std::string_view KernelName{KernelID.get_name()}; - if (KernelName.find(Prefix) == 0) { - KernelName.remove_prefix(Prefix.length()); - KernelNames.emplace(KernelName); - static constexpr std::string_view SYCLKernelMarker{ - "__sycl_kernel_"}; - if (KernelName.find(SYCLKernelMarker) == 0) { - // extern "C" declaration, implicitly register kernel without the - // marker. - std::string_view KernelNameWithoutMarker{KernelName}; - KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); - MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); - } - } - - for (const sycl_device_binary_property &RKProp : - NewImage->getRegisteredKernels()) { - // Mangled names. - auto BA = DeviceBinaryProperty(RKProp).asByteArray(); - auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; - std::string_view MangledName{ - reinterpret_cast(BA.begin()), MangledNameLen}; - MangledKernelNames.emplace(RKProp->Name, MangledName); - } - - // Device globals. - for (const auto &DeviceGlobalProp : NewImage->getDeviceGlobals()) { - std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; - assert(DeviceGlobalName.find(Prefix) == 0); - bool Inserted = false; - std::tie(std::ignore, Inserted) = - DeviceGlobalIDSet.emplace(DeviceGlobalName); - if (Inserted) { - DeviceGlobalIDVec.emplace_back(DeviceGlobalName); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalNames.emplace_back(DeviceGlobalName); - } - } - } - - // Device globals are usually statically allocated and registered in the - // integration footer, which we don't have in the RTC context. Instead, - // we dynamically allocate storage tied to the executable kernel bundle. - for (DeviceGlobalMapEntry *DeviceGlobalEntry : - PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { - - size_t AllocSize = - DeviceGlobalEntry->MDeviceGlobalTSize; // init value - if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { - // Consider storage for device USM pointer. - AllocSize += sizeof(void *); - } - auto Alloc = std::make_unique(AllocSize); - std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; - PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); - DeviceGlobalAllocations.push_back(std::move(Alloc)); - - // Drop the RTC prefix from the entry's symbol name. Note that the PM - // still manages this device global under its prefixed name. - assert(DeviceGlobalName.find(Prefix) == 0); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalEntry->MUniqueId = DeviceGlobalName; - } - - auto DGRegs = std::make_shared( - ContextImpl, std::string{Prefix}, std::move(DeviceGlobalNames), - std::move(DeviceGlobalAllocations)); - - // Mark the image as input so the program manager will bring it into - // the right state. - auto DevImgImpl = std::make_shared( - NewImage, MContext, Devices, bundle_state::input, - std::move(KernelIDs), MRTCBinInfo->MLanguage, - std::move(KernelNames), std::move(MangledKernelNames), - std::string{Prefix}, std::move(DGRegs)); - - // Resolve dependencies. - // TODO: Consider making a collectDeviceImageDeps variant that takes a - // set reference and inserts into that instead. - std::set ImgDeps; - for (const device &Device : Devices) { - std::set DevImgDeps = - PM.collectDeviceImageDeps(*NewImage, Device); - ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); - } - - // Pack main image and dependencies together. - std::vector NewImageAndDeps; - NewImageAndDeps.reserve(1 + ImgDeps.size()); - NewImageAndDeps.push_back( - createSyclObjFromImpl(std::move(DevImgImpl))); - for (RTDeviceBinaryImage *ImgDep : ImgDeps) - NewImageAndDeps.push_back(PM.createDependencyImage( - MContext, Devices, ImgDep, bundle_state::input)); - - DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); - PM.bringSYCLDeviceImageToState(ImgWithDeps, bundle_state::executable); - Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); - } - - OutDeviceBins.emplace_back( - std::make_shared(std::move(Binaries))); - return Result; - } + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) + return createSYCLImages(Devices, bundle_state::executable, BuildOptions, + LogPtr, RegisteredKernelNames, OutDeviceBins); std::vector DeviceVec; DeviceVec.reserve(Devices.size()); @@ -902,51 +743,8 @@ class device_image_impl { const AdapterPtr &Adapter = ContextImpl->getAdapter(); - if (!FetchedFromCache) { - const auto spirv = [&]() -> std::vector { - switch (MRTCBinInfo->MLanguage) { - case syclex::source_language::opencl: { - // if successful, the log is empty. if failed, throws an error with - // the compilation log. - std::vector IPVersionVec(Devices.size()); - std::transform(DeviceVec.begin(), DeviceVec.end(), - IPVersionVec.begin(), [&](ur_device_handle_t d) { - uint32_t ipVersion = 0; - Adapter->call( - d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - return ipVersion; - }); - return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, - BuildOptions, LogPtr); - } - case syclex::source_language::spirv: { - const auto &SourceBytes = std::get>(MBinImage); - std::vector Result(SourceBytes.size()); - std::transform(SourceBytes.cbegin(), SourceBytes.cend(), - Result.begin(), - [](std::byte B) { return static_cast(B); }); - return Result; - } - default: - break; - } - throw sycl::exception( - make_error_code(errc::invalid), - "SYCL C++, OpenCL C and SPIR-V are the only supported " - "languages at this time"); - }(); - - Adapter->call( - ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, - &UrProgram); - // program created by urProgramCreateWithIL is implicitly retained. - if (UrProgram == nullptr) - throw sycl::exception( - sycl::make_error_code(errc::invalid), - "urProgramCreateWithIL resulted in a null program handle."); - - } // if(!FetchedFromCache) + if (!FetchedFromCache) + UrProgram = createProgramFromSource(Devices, BuildOptions, LogPtr); std::string XsFlags = extractXsFlags(BuildOptions); auto Res = Adapter->call_nocheck( @@ -963,18 +761,8 @@ class device_image_impl { UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, nullptr); - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); std::vector KernelNames = - detail::split_string(KernelNamesStr, ';'); + getKernelNamesFromURProgram(Adapter, UrProgram); KernelNameSetT KernelNameSet{KernelNames.begin(), KernelNames.end()}; // If caching enabled and kernel not fetched from cache, cache. @@ -990,13 +778,53 @@ class device_image_impl { MRTCBinInfo->MLanguage, std::move(KernelNameSet))}; } + std::vector> compileFromSource( + const std::vector &Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBins) + const { + assert(!std::holds_alternative(MBinImage)); + assert(MRTCBinInfo); + assert(MOrigins & ImageOriginKernelCompiler); + + if (MRTCBinInfo->MLanguage != syclex::source_language::sycl) + throw sycl::exception( + make_error_code(errc::invalid), + "compile is only available for kernel_bundle " + "when the source language was sycl."); + + std::shared_ptr ContextImpl = + getSyclObjImpl(MContext); + + for (const auto &SyclDev : Devices) { + DeviceImplPtr DevImpl = getSyclObjImpl(SyclDev); + if (!ContextImpl->hasDevice(DevImpl)) { + throw sycl::exception(make_error_code(errc::invalid), + "device not part of kernel_bundle context"); + } + if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + // This error cannot not be exercised in the current implementation, as + // compatibility with a source language depends on the backend's + // capabilities and all devices in one context share the same backend in + // the current implementation, so this would lead to an error already + // during construction of the source bundle. + throw sycl::exception(make_error_code(errc::invalid), + "device does not support source language"); + } + } + return createSYCLImages(Devices, bundle_state::object, CompileOptions, + LogPtr, RegisteredKernelNames, OutDeviceBins); + } + private: bool hasRTDeviceBinaryImage() const noexcept { return std::holds_alternative(MBinImage) && get_bin_image_ref() != nullptr; } - static std::string trimXsFlags(std::string &str) { + static std::string_view trimXsFlags(std::string_view &str) { // Trim first and last quote if they exist, but no others. char EncounteredQuote = '\0'; auto Start = std::find_if(str.begin(), str.end(), [&](char c) { @@ -1014,31 +842,31 @@ class device_image_impl { return !std::isspace(c); }).base(); if (Start != std::end(str) && End != std::begin(str) && Start < End) { - return std::string(Start, End); + return std::string_view(&*Start, std::distance(Start, End)); } return ""; } static std::string - extractXsFlags(const std::vector &BuildOptions) { + extractXsFlags(const std::vector &BuildOptions) { std::stringstream SS; - for (std::string Option : BuildOptions) { - auto Where = Option.find("-Xs"); - if (Where != std::string::npos) { + for (sycl::detail::string_view Option : BuildOptions) { + std::string_view OptionSV{Option.data()}; + auto Where = OptionSV.find("-Xs"); + if (Where != std::string_view::npos) { Where += 3; - std::string Flags = Option.substr(Where); + std::string_view Flags = OptionSV.substr(Where); SS << trimXsFlags(Flags) << " "; } } return SS.str(); } - bool - extKernelCompilerFetchFromCache(const std::vector Devices, - const std::vector &BuildOptions, - const std::string &SourceStr, - ur_program_handle_t &UrProgram) const { + bool extKernelCompilerFetchFromCache( + const std::vector Devices, + const std::vector &BuildOptions, + const std::string &SourceStr, ur_program_handle_t &UrProgram) const { const std::shared_ptr &ContextImpl = getSyclObjImpl(MContext); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -1150,6 +978,284 @@ class device_image_impl { } } + std::vector> createSYCLImages( + const std::vector &Devices, bundle_state State, + const std::vector &Options, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBins) + const { + assert(MRTCBinInfo); + assert(MRTCBinInfo->MLanguage == syclex::source_language::sycl); + assert(std::holds_alternative(MBinImage)); + + // Build device images via the program manager. + const std::string &SourceStr = std::get(MBinImage); + std::ostringstream SourceExt; + if (!RegisteredKernelNames.empty()) { + SourceExt << SourceStr << '\n'; + + auto EmitEntry = + [&SourceExt]( + const sycl::detail::string_view &Name) -> std::ostringstream & { + SourceExt << " {\"" << Name.data() << "\", " << Name.data() << "}"; + return SourceExt; + }; + + SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; + for (auto It = RegisteredKernelNames.begin(), + SecondToLast = RegisteredKernelNames.end() - 1; + It != SecondToLast; ++It) { + EmitEntry(*It) << ",\n"; + } + EmitEntry(RegisteredKernelNames.back()) << "\n"; + SourceExt << ")]];\n"; + } + + auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( + RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), + MRTCBinInfo->MIncludePairs, Options, LogPtr); + + auto &PM = detail::ProgramManager::getInstance(); + + // Add all binaries and keep the images for processing. + std::vector>>> + NewImages; + NewImages.reserve(Binaries->NumDeviceBinaries); + for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { + sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); + RTDeviceBinaryImage *NewImage = nullptr; + auto KernelIDs = std::make_shared>(); + PM.addImage(Binary, /*RegisterImgExports=*/false, &NewImage, + KernelIDs.get()); + if (NewImage) + NewImages.push_back( + std::make_pair(std::move(NewImage), std::move(KernelIDs))); + } + + // Now bring all images into the proper state. Note that we do this in a + // separate pass over NewImages to make sure dependency images have been + // registered beforehand. + std::vector> Result; + Result.reserve(NewImages.size()); + for (auto &[NewImage, KernelIDs] : NewImages) { + const RTDeviceBinaryImage &NewImageRef = *NewImage; + + // Filter the devices that support the image requirements. + std::vector SupportingDevs = Devices; + auto NewSupportingDevsEnd = std::remove_if( + SupportingDevs.begin(), SupportingDevs.end(), + [&NewImageRef](const sycl::device &SDev) { + return !doesDevSupportDeviceRequirements(SDev, NewImageRef); + }); + + // If there are no devices that support the image, we skip it. + if (NewSupportingDevsEnd == SupportingDevs.begin()) + continue; + SupportingDevs.erase(NewSupportingDevsEnd, SupportingDevs.end()); + + KernelNameSetT KernelNames; + MangledKernelNameMapT MangledKernelNames; + std::unordered_set DeviceGlobalIDSet; + std::vector DeviceGlobalIDVec; + std::vector DeviceGlobalNames; + std::vector> DeviceGlobalAllocations; + + for (const auto &KernelID : *KernelIDs) { + std::string_view KernelName{KernelID.get_name()}; + if (KernelName.find(Prefix) == 0) { + KernelName.remove_prefix(Prefix.length()); + KernelNames.emplace(KernelName); + static constexpr std::string_view SYCLKernelMarker{"__sycl_kernel_"}; + if (KernelName.find(SYCLKernelMarker) == 0) { + // extern "C" declaration, implicitly register kernel without the + // marker. + std::string_view KernelNameWithoutMarker{KernelName}; + KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); + MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); + } + } + + for (const sycl_device_binary_property &RKProp : + NewImage->getRegisteredKernels()) { + // Mangled names. + auto BA = DeviceBinaryProperty(RKProp).asByteArray(); + auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; + std::string_view MangledName{ + reinterpret_cast(BA.begin()), MangledNameLen}; + MangledKernelNames.emplace(RKProp->Name, MangledName); + } + + // Device globals. + for (const auto &DeviceGlobalProp : NewImage->getDeviceGlobals()) { + std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; + assert(DeviceGlobalName.find(Prefix) == 0); + bool Inserted = false; + std::tie(std::ignore, Inserted) = + DeviceGlobalIDSet.emplace(DeviceGlobalName); + if (Inserted) { + DeviceGlobalIDVec.emplace_back(DeviceGlobalName); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalNames.emplace_back(DeviceGlobalName); + } + } + } + + // Device globals are usually statically allocated and registered in the + // integration footer, which we don't have in the RTC context. Instead, + // we dynamically allocate storage tied to the executable kernel bundle. + for (DeviceGlobalMapEntry *DeviceGlobalEntry : + PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { + + size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value + if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { + // Consider storage for device USM pointer. + AllocSize += sizeof(void *); + } + auto Alloc = std::make_unique(AllocSize); + std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; + PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); + DeviceGlobalAllocations.push_back(std::move(Alloc)); + + // Drop the RTC prefix from the entry's symbol name. Note that the PM + // still manages this device global under its prefixed name. + assert(DeviceGlobalName.find(Prefix) == 0); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalEntry->MUniqueId = DeviceGlobalName; + } + + auto DGRegs = std::make_shared( + getSyclObjImpl(MContext), std::string{Prefix}, + std::move(DeviceGlobalNames), std::move(DeviceGlobalAllocations)); + + // Mark the image as input so the program manager will bring it into + // the right state. + auto DevImgImpl = std::make_shared( + NewImage, MContext, std::move(SupportingDevs), bundle_state::input, + std::move(KernelIDs), MRTCBinInfo->MLanguage, std::move(KernelNames), + std::move(MangledKernelNames), std::string{Prefix}, + std::move(DGRegs)); + + // Resolve dependencies. + // If we are compiling to object, we do not want to error for unresolved + // imports. + // TODO: Consider making a collectDeviceImageDeps variant that takes a + // set reference and inserts into that instead. + std::set ImgDeps; + for (const device &Device : DevImgImpl->get_devices()) { + std::set DevImgDeps = PM.collectDeviceImageDeps( + *NewImage, Device, + /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); + ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); + } + + // Pack main image and dependencies together. + std::vector NewImageAndDeps; + NewImageAndDeps.reserve( + 1 + ((State == bundle_state::executable) * ImgDeps.size())); + NewImageAndDeps.push_back( + createSyclObjFromImpl(std::move(DevImgImpl))); + const std::vector &SupportingDevsRef = + getSyclObjImpl(NewImageAndDeps[0])->get_devices(); + if (State == bundle_state::executable) { + // If target is executable we bundle the image and dependencies together + // and bring it into state. + for (RTDeviceBinaryImage *ImgDep : ImgDeps) + NewImageAndDeps.push_back(PM.createDependencyImage( + MContext, SupportingDevsRef, ImgDep, bundle_state::input)); + } else if (State == bundle_state::object) { + // If the target is object, we bring the dependencies into object state + // individually and put them in the bundle. + for (RTDeviceBinaryImage *ImgDep : ImgDeps) { + DevImgPlainWithDeps ImgDepWithDeps{PM.createDependencyImage( + MContext, SupportingDevsRef, ImgDep, bundle_state::input)}; + PM.bringSYCLDeviceImageToState(ImgDepWithDeps, State); + Result.push_back(getSyclObjImpl(ImgDepWithDeps.getMain())); + } + } + + DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); + PM.bringSYCLDeviceImageToState(ImgWithDeps, State); + Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); + } + + OutDeviceBins.emplace_back( + std::make_shared(std::move(Binaries))); + return Result; + } + + ur_program_handle_t + createProgramFromSource(const std::vector Devices, + const std::vector &Options, + std::string *LogPtr) const { + const std::shared_ptr &ContextImpl = + getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const auto spirv = [&]() -> std::vector { + switch (MRTCBinInfo->MLanguage) { + case syclex::source_language::opencl: { + // if successful, the log is empty. if failed, throws an error with + // the compilation log. + const auto &SourceStr = std::get(MBinImage); + std::vector IPVersionVec(Devices.size()); + std::transform(Devices.begin(), Devices.end(), IPVersionVec.begin(), + [&](const sycl::device &SyclDev) { + uint32_t ipVersion = 0; + Adapter->call( + getSyclObjImpl(SyclDev)->getHandleRef(), + UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), + &ipVersion, nullptr); + return ipVersion; + }); + return syclex::detail::OpenCLC_to_SPIRV(SourceStr, IPVersionVec, + Options, LogPtr); + } + case syclex::source_language::spirv: { + const auto &SourceBytes = std::get>(MBinImage); + std::vector Result(SourceBytes.size()); + std::transform(SourceBytes.cbegin(), SourceBytes.cend(), Result.begin(), + [](std::byte B) { return static_cast(B); }); + return Result; + } + default: + break; + } + throw sycl::exception( + make_error_code(errc::invalid), + "SYCL C++, OpenCL C and SPIR-V are the only supported " + "languages at this time"); + }(); + + ur_program_handle_t UrProgram = nullptr; + Adapter->call(ContextImpl->getHandleRef(), + spirv.data(), spirv.size(), + nullptr, &UrProgram); + // program created by urProgramCreateWithIL is implicitly retained. + if (UrProgram == nullptr) + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "urProgramCreateWithIL resulted in a null program handle."); + + return UrProgram; + } + + static std::vector + getKernelNamesFromURProgram(const AdapterPtr &Adapter, + ur_program_handle_t UrProgram) { + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + return detail::split_string(KernelNamesStr, ';'); + } + const std::variant, const RTDeviceBinaryImage *> MBinImage = static_cast(nullptr); @@ -1186,6 +1292,9 @@ class device_image_impl { // Optional information about the binary produced by the kernel compiler // extension. std::optional MRTCBinInfo = std::nullopt; + + // Used to store a dynamically created merged binary image, e.g. from linking. + std::unique_ptr MMergedImageStorage = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 679bfa407ee21..3daee613a05bc 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -888,7 +888,7 @@ bool device_impl::isGetDeviceAndHostTimerSupported() { return Result != UR_RESULT_ERROR_INVALID_OPERATION; } -bool device_impl::extOneapiCanCompile( +bool device_impl::extOneapiCanBuild( ext::oneapi::experimental::source_language Language) { try { // Get the shared_ptr to this object from the platform that owns it. @@ -902,6 +902,20 @@ bool device_impl::extOneapiCanCompile( } } +bool device_impl::extOneapiCanCompile( + ext::oneapi::experimental::source_language Language) { + try { + // Currently only SYCL language is supported for compiling. + std::shared_ptr Self = MPlatform->getOrMakeDeviceImpl(MDevice); + return Language == ext::oneapi::experimental::source_language::sycl && + sycl::ext::oneapi::experimental::detail:: + is_source_kernel_bundle_supported( + Language, std::vector{Self}); + } catch (sycl::exception &) { + return false; + } +} + // Returns the strongest guarantee that can be provided by the host device for // threads created at threadScope from a coordination scope given by // coordinationScope diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 2b678fe475f31..b2589632e4cd4 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -226,6 +226,7 @@ class device_impl { return false; } + bool extOneapiCanBuild(ext::oneapi::experimental::source_language Language); bool extOneapiCanCompile(ext::oneapi::experimental::source_language Language); // Returns all guarantees that are either equal to guarantee or weaker than diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 94744b4813e1b..f208187523f69 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -194,7 +195,6 @@ class kernel_bundle_impl { const std::vector> &ObjectBundles, std::vector Devs, const property_list &PropList) : MDevices(std::move(Devs)), MState(bundle_state::executable) { - if (MDevices.empty()) throw sycl::exception(make_error_code(errc::invalid), "Vector of devices is empty"); @@ -233,35 +233,174 @@ class kernel_bundle_impl { // TODO: Unify with c'tor for sycl::compile and sycl::build by calling // sycl::join on vector of kernel_bundles - // The loop below just links each device image separately, not linking any - // two device images together. This is correct so long as each device image - // has no unresolved symbols. That's the case when device images are created - // from generic SYCL APIs. There's no way in generic SYCL to create a kernel - // which references an undefined symbol. If we decide in the future to allow - // a backend interop API to create a "sycl::kernel_bundle" that references - // undefined symbols, then the logic in this loop will need to be changed. + // Due to a bug in L0, specializations with conflicting IDs will overwrite + // each other when linked together, so to avoid this issue we link + // regular offline-compiled SYCL device images in separation. + // TODO: Remove when spec const overwriting issue has been fixed in L0. + std::vector OfflineDeviceImages; + std::unordered_set> + OfflineDeviceImageSet; for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const DevImgPlainWithDeps &DeviceImageWithDeps : getSyclObjImpl(ObjectBundle)->MDeviceImages) { + if (getSyclObjImpl(DeviceImageWithDeps.getMain())->getOriginMask() & + ImageOriginSYCLOffline) { + OfflineDeviceImages.push_back(&DeviceImageWithDeps); + for (const device_image_plain &DevImg : DeviceImageWithDeps) + OfflineDeviceImageSet.insert(getSyclObjImpl(DevImg)); + } + } + } + + // Collect all unique images. + std::vector DevImages; + { + std::set> DevImagesSet; + for (const kernel_bundle &ObjectBundle : + ObjectBundles) + for (const device_image_plain &DevImg : + getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages) + if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) == + OfflineDeviceImageSet.end()) + DevImagesSet.insert(getSyclObjImpl(DevImg)); + DevImages.reserve(DevImagesSet.size()); + for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();) + DevImages.push_back(createSyclObjFromImpl( + std::move(DevImagesSet.extract(It++).value()))); + } - // Skip images which are not compatible with devices provided - if (std::none_of(MDevices.begin(), MDevices.end(), - [&DeviceImageWithDeps](const device &Dev) { - return getSyclObjImpl(DeviceImageWithDeps.getMain()) - ->compatible_with_device(Dev); - })) + // Check for conflicting kernels in RTC kernel bundles. + { + std::set> SeenKernelNames; + std::set> Conflicts; + for (const device_image_plain &DevImage : DevImages) { + const std::optional &RTCInfo = + getSyclObjImpl(DevImage)->getRTCInfo(); + if (!RTCInfo.has_value()) continue; + std::vector Intersect; + std::set_intersection(SeenKernelNames.begin(), SeenKernelNames.end(), + RTCInfo->MKernelNames.begin(), + RTCInfo->MKernelNames.end(), + std::inserter(Conflicts, Conflicts.begin())); + SeenKernelNames.insert(RTCInfo->MKernelNames.begin(), + RTCInfo->MKernelNames.end()); + } - std::vector LinkedResults = - detail::ProgramManager::getInstance().link(DeviceImageWithDeps, - MDevices, PropList); - MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), - LinkedResults.end()); - MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), - LinkedResults.begin(), LinkedResults.end()); + if (!Conflicts.empty()) { + std::stringstream MsgS; + MsgS << "Conflicting kernel definitions: "; + for (const std::string_view &Conflict : Conflicts) + MsgS << " " << Conflict; + throw sycl::exception(make_error_code(errc::invalid), MsgS.str()); + } + } + + // Create a map between exported symbols and their indices in the device + // images collection. + std::map ExportMap; + for (size_t I = 0; I < DevImages.size(); ++I) { + auto DevImageImpl = getSyclObjImpl(DevImages[I]); + if (DevImageImpl->get_bin_image_ref() == nullptr) + continue; + for (const sycl_device_binary_property &ESProp : + DevImageImpl->get_bin_image_ref()->getExportedSymbols()) { + if (ExportMap.find(ESProp->Name) != ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "Duplicate exported symbol \"" + + std::string{ESProp->Name} + + "\" found in binaries."); + ExportMap.emplace(ESProp->Name, I); } } + + // Create dependency mappings. + std::vector> Dependencies; + Dependencies.resize(DevImages.size()); + for (size_t I = 0; I < DevImages.size(); ++I) { + auto DevImageImpl = getSyclObjImpl(DevImages[I]); + if (DevImageImpl->get_bin_image_ref() == nullptr) + continue; + std::set DeviceImageDepsSet; + for (const sycl_device_binary_property &ISProp : + DevImageImpl->get_bin_image_ref()->getImportedSymbols()) { + auto ExportSymbolIt = ExportMap.find(ISProp->Name); + if (ExportSymbolIt == ExportMap.end()) + throw sycl::exception(make_error_code(errc::invalid), + "No exported symbol \"" + + std::string{ISProp->Name} + + "\" found in linked images."); + DeviceImageDepsSet.emplace(ExportSymbolIt->second); + } + Dependencies[I].insert(Dependencies[I].end(), DeviceImageDepsSet.begin(), + DeviceImageDepsSet.end()); + } + + // Create a link graph and clone it for each device. + const std::shared_ptr &FirstDevice = + getSyclObjImpl(MDevices[0]); + std::map, LinkGraph> + DevImageLinkGraphs; + const auto &FirstGraph = + DevImageLinkGraphs + .emplace(FirstDevice, + LinkGraph{DevImages, Dependencies}) + .first->second; + for (size_t I = 1; I < MDevices.size(); ++I) + DevImageLinkGraphs.emplace(getSyclObjImpl(MDevices[I]), + FirstGraph.Clone()); + + // Poison the images based on whether the corresponding device supports it. + for (auto &GraphIt : DevImageLinkGraphs) { + device Dev = createSyclObjFromImpl(GraphIt.first); + GraphIt.second.Poison([&Dev](const device_image_plain &DevImg) { + return !getSyclObjImpl(DevImg)->compatible_with_device(Dev); + }); + } + + // Unify graphs after poisoning. + std::map>, + LinkGraph> + UnifiedGraphs = UnifyGraphs(DevImageLinkGraphs); + + // Link based on the resulting graphs. + for (auto &GraphIt : UnifiedGraphs) { + std::vector DeviceGroup; + DeviceGroup.reserve(GraphIt.first.size()); + for (const auto &DeviceImgImpl : GraphIt.first) + DeviceGroup.emplace_back(createSyclObjFromImpl(DeviceImgImpl)); + + std::vector LinkedResults = + detail::ProgramManager::getInstance().link( + GraphIt.second.GetNodeValues(), DeviceGroup, PropList); + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), + LinkedResults.end()); + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + LinkedResults.begin(), LinkedResults.end()); + // TODO: Kernels may be in multiple device images, so mapping should be + // added. + } + + // ... And link the offline images in separation. (Workaround.) + for (const DevImgPlainWithDeps *DeviceImageWithDeps : OfflineDeviceImages) { + // Skip images which are not compatible with devices provided + if (std::none_of(MDevices.begin(), MDevices.end(), + [DeviceImageWithDeps](const device &Dev) { + return getSyclObjImpl(DeviceImageWithDeps->getMain()) + ->compatible_with_device(Dev); + })) + continue; + + std::vector LinkedResults = + detail::ProgramManager::getInstance().link( + DeviceImageWithDeps->getAll(), MDevices, PropList); + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), + LinkedResults.end()); + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + LinkedResults.begin(), LinkedResults.end()); + } + removeDuplicateImages(); for (const kernel_bundle &Bundle : ObjectBundles) { @@ -388,11 +527,11 @@ class kernel_bundle_impl { kernel_bundle_impl( const context &Context, const std::vector &Devs, std::vector &&DevImgs, - std::vector> &&DevBinaries) + std::vector> &&DevBinaries, + bundle_state State) : MContext(Context), MDevices(Devs), MSharedDeviceBinaries(std::move(DevBinaries)), - MUniqueDeviceImages(std::move(DevImgs)), - MState(bundle_state::executable) { + MUniqueDeviceImages(std::move(DevImgs)), MState(State) { common_ctor_checks(); removeDuplicateImages(); @@ -401,11 +540,11 @@ class kernel_bundle_impl { MDeviceImages.emplace_back(DevImg); } - std::shared_ptr - build_from_source(const std::vector Devices, - const std::vector &BuildOptions, - std::string *LogPtr, - const std::vector &RegisteredKernelNames) { + std::shared_ptr build_from_source( + const std::vector Devices, + const std::vector &BuildOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { assert(MState == bundle_state::ext_oneapi_source && "bundle_state::ext_oneapi_source required"); assert(allSourceBasedImages() && "All images must be source-based."); @@ -421,7 +560,33 @@ class kernel_bundle_impl { NewDevImgs.emplace_back(std::move(DevImgImpl)); } return std::make_shared( - MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso)); + MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso), + bundle_state::executable); + } + + std::shared_ptr compile_from_source( + const std::vector Devices, + const std::vector &CompileOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames) { + assert(MState == bundle_state::ext_oneapi_source && + "bundle_state::ext_oneapi_source required"); + assert(allSourceBasedImages() && "All images must be source-based."); + + std::vector NewDevImgs; + std::vector> NewBinReso; + for (device_image_plain &DevImg : MUniqueDeviceImages) { + std::vector> NewDevImgImpls = + getSyclObjImpl(DevImg)->compileFromSource( + Devices, CompileOptions, LogPtr, RegisteredKernelNames, + NewBinReso); + NewDevImgs.reserve(NewDevImgImpls.size()); + for (std::shared_ptr &DevImgImpl : NewDevImgImpls) + NewDevImgs.emplace_back(std::move(DevImgImpl)); + } + return std::make_shared( + MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso), + bundle_state::object); } public: diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp index 78e1f1399a2c2..5cc859f47cf3e 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.cpp @@ -204,13 +204,11 @@ std::string InvokeOclocQuery(const std::vector &IPVersionVec, return QueryLog; } -spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, - const std::vector &IPVersionVec, - const std::vector &UserArgs, - std::string *LogPtr) { - std::vector CMUserArgs = UserArgs; - CMUserArgs.push_back("-cmc"); - +spirv_vec_t +OpenCLC_to_SPIRV(const std::string &Source, + const std::vector &IPVersionVec, + const std::vector &UserArgs, + std::string *LogPtr) { // handles into ocloc shared lib static void *oclocInvokeHandle = nullptr; static void *oclocFreeOutputHandle = nullptr; @@ -220,11 +218,11 @@ spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, IPVersionVec); // assemble ocloc args - std::string CombinedUserArgs = - std::accumulate(UserArgs.begin(), UserArgs.end(), std::string(""), - [](const std::string &acc, const std::string &s) { - return acc + s + " "; - }); + std::string CombinedUserArgs = ""; + for (const sycl::detail::string_view &UserArg : UserArgs) { + CombinedUserArgs += UserArg.data(); + CombinedUserArgs += " "; + } std::vector Args = {"ocloc", "-q", "-spv_only", "-options", CombinedUserArgs.c_str()}; diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp index 85f4b9b2a4884..10002fa5d8013 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp @@ -20,10 +20,11 @@ namespace ext::oneapi::experimental { namespace detail { using spirv_vec_t = std::vector; -spirv_vec_t OpenCLC_to_SPIRV(const std::string &Source, - const std::vector &IPVersionVec, - const std::vector &UserArgs, - std::string *LogPtr); +spirv_vec_t +OpenCLC_to_SPIRV(const std::string &Source, + const std::vector &IPVersionVec, + const std::vector &UserArgs, + std::string *LogPtr); // IPVersionVec gets flattened and passed to ocloc as the -dev flag. bool OpenCLC_Compilation_Available(const std::vector &IPVersionVec); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 88f2b46ee9170..93ec94a8ac328 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -21,12 +21,15 @@ inline namespace _V1 { namespace ext::oneapi::experimental { namespace detail { -std::string userArgsAsString(const std::vector &UserArguments) { - return std::accumulate(UserArguments.begin(), UserArguments.end(), - std::string(""), - [](const std::string &A, const std::string &B) { - return A.empty() ? B : A + " " + B; - }); +std::string +userArgsAsString(const std::vector &UserArguments) { + std::string Result = ""; + for (const sycl::detail::string_view &UserArg : UserArguments) { + if (!Result.empty()) + Result += " "; + Result += UserArg.data(); + } + return Result; } bool SYCL_JIT_Compilation_Available() { @@ -37,16 +40,19 @@ bool SYCL_JIT_Compilation_Available() { #endif } -std::pair -SYCL_JIT_Compile([[maybe_unused]] const std::string &SYCLSource, - [[maybe_unused]] const include_pairs_t &IncludePairs, - [[maybe_unused]] const std::vector &UserArgs, - [[maybe_unused]] std::string *LogPtr) { +std::pair SYCL_JIT_Compile( + [[maybe_unused]] const std::string &SYCLSource, + [[maybe_unused]] const include_pairs_t &IncludePairs, + [[maybe_unused]] const std::vector &UserArgs, + [[maybe_unused]] std::string *LogPtr) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); + std::vector UserArgStrings; + for (const sycl::detail::string_view UserArg : UserArgs) + UserArgStrings.push_back(UserArg.data()); return sycl::detail::jit_compiler::get_instance().compileSYCL( - CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr); + CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 86215b75baf12..047a10a061e1f 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -10,6 +10,7 @@ #include #include // __SYCL_EXPORT +#include #include // sycl_device_binaries @@ -23,7 +24,8 @@ namespace detail { using include_pairs_t = std::vector>; -std::string userArgsAsString(const std::vector &UserArguments); +std::string +userArgsAsString(const std::vector &UserArguments); // Compile the given SYCL source string and virtual include files into the image // format understood by the program manager. @@ -32,7 +34,8 @@ std::string userArgsAsString(const std::vector &UserArguments); // bundle-specific prefix used for loading the kernels. std::pair SYCL_JIT_Compile(const std::string &Source, const include_pairs_t &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr); + const std::vector &UserArgs, + std::string *LogPtr); void SYCL_JIT_Destroy(sycl_device_binaries Binaries); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index b5afd28ac1d5b..b35e6ae832447 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -675,7 +675,8 @@ static bool checkLinkingSupport(const device &Dev, std::set ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, - const device &Dev) { + const device &Dev, + bool ErrorOnUnresolvableImport) { // TODO collecting dependencies for virtual functions and imported symbols // should be combined since one can lead to new unresolved dependencies for // the other. @@ -683,14 +684,16 @@ ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, collectDependentDeviceImagesForVirtualFunctions(Img, Dev); std::set ImageDeps = - collectDeviceImageDepsForImportedSymbols(Img, Dev); + collectDeviceImageDepsForImportedSymbols(Img, Dev, + ErrorOnUnresolvableImport); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); return DeviceImagesToLink; } std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( - const RTDeviceBinaryImage &MainImg, const device &Dev) { + const RTDeviceBinaryImage &MainImg, const device &Dev, + bool ErrorOnUnresolvableImport) { std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; @@ -728,7 +731,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( } break; } - if (!Found) + if (ErrorOnUnresolvableImport && !Found) throw sycl::exception(make_error_code(errc::build), "No device image found for external symbol " + Symbol); @@ -1448,7 +1451,8 @@ ProgramManager::ProgramManager() UseSpvEnv + ": " + SpvFile); File.seekg(0, std::ios::end); size_t Size = File.tellg(); - std::unique_ptr Data(new char[Size]); + std::unique_ptr> Data(new char[Size], + std::free); File.seekg(0); File.read(Data.get(), Size); File.close(); @@ -1909,6 +1913,7 @@ static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { } void ProgramManager::addImage(sycl_device_binary RawImg, + bool RegisterImgExports, RTDeviceBinaryImage **OutImage, std::vector *OutKernelIDs) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; @@ -1981,7 +1986,8 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } else { size_t ImgSize = static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); - std::unique_ptr Data(new char[ImgSize]); + std::unique_ptr> Data( + new char[ImgSize], std::free); std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); DevImg = std::make_unique(std::move(Data), ImgSize); @@ -2000,8 +2006,11 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } // Register all exported symbols - for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + if (RegisterImgExports) { + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + } } // Record mapping between virtual function sets and device images @@ -2794,7 +2803,7 @@ void ProgramManager::bringSYCLDeviceImageToState( break; case bundle_state::object: { std::vector LinkedDevImages = - link(DeviceImage, MainImgImpl->get_devices(), + link(DeviceImage.getAll(), MainImgImpl->get_devices(), /*PropList=*/{}); // Since only one device image is passed here one output device image is // expected @@ -2920,8 +2929,9 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, std::optional RTCInfo = InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( - InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, - bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, + InputImpl->get_bin_image_ref(), InputImpl->get_context(), + std::vector{Devs}, bundle_state::object, + InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), std::move(RTCInfo)); @@ -2949,10 +2959,14 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, return CompiledImages; } -static void mergeImageData(const std::vector &Imgs, - std::vector &KernelIDs, - std::vector &NewSpecConstBlob, - device_image_impl::SpecConstMapT &NewSpecConstMap) { +// Returns a merged device binary image, new set of kernel IDs and new +// specialization constant data. +static const RTDeviceBinaryImage * +mergeImageData(const std::vector &Imgs, + std::vector &KernelIDs, + std::vector &NewSpecConstBlob, + device_image_impl::SpecConstMapT &NewSpecConstMap, + std::unique_ptr &MergedImageStorage) { for (const device_image_plain &Img : Imgs) { const std::shared_ptr &DeviceImageImpl = getSyclObjImpl(Img); @@ -2991,10 +3005,28 @@ static void mergeImageData(const std::vector &Imgs, } // device_image_impl expects kernel ids to be sorted for fast search std::sort(KernelIDs.begin(), KernelIDs.end(), LessByHash{}); + + // If there is only a single image, use it as the result. + if (Imgs.size() == 1) + return getSyclObjImpl(Imgs[0])->get_bin_image_ref(); + + // Otherwise we create a dynamic image with the merged information. + std::vector BinImgs; + BinImgs.reserve(Imgs.size()); + for (const device_image_plain &Img : Imgs) { + auto ImgBinRef = getSyclObjImpl(Img)->get_bin_image_ref(); + // For some cases, like SYCL kernel compiler binaries, we don't have + // binaries. For these we assume no properties associated, so they can be + // safely ignored. + if (ImgBinRef) + BinImgs.push_back(ImgBinRef); + } + MergedImageStorage = std::make_unique(BinImgs); + return MergedImageStorage.get(); } std::vector -ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, +ProgramManager::link(const std::vector &Imgs, const std::vector &Devs, const property_list &PropList) { { @@ -3003,7 +3035,6 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, PropList, NoAllowedPropertiesCheck, NoAllowedPropertiesCheck); } - const std::vector &Imgs = ImgWithDeps.getAll(); std::vector URPrograms; URPrograms.reserve(Imgs.size()); for (const device_image_plain &Img : Imgs) @@ -3013,18 +3044,17 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, URDevices.reserve(Devs.size()); for (const device &Dev : Devs) URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); - + // FIXME: Linker options are picked from the first object, but is that safe? std::string LinkOptionsStr; applyLinkOptionsFromEnvironment(LinkOptionsStr); - const device_image_plain &MainImg = ImgWithDeps.getMain(); - const std::shared_ptr &InputImpl = getSyclObjImpl(MainImg); - if (LinkOptionsStr.empty()) { + const std::shared_ptr &FirstImgImpl = + getSyclObjImpl(Imgs[0]); + if (LinkOptionsStr.empty() && FirstImgImpl->get_bin_image_ref()) appendLinkOptionsFromImage(LinkOptionsStr, - *(InputImpl->get_bin_image_ref())); - } + *(FirstImgImpl->get_bin_image_ref())); // Should always come last! appendLinkEnvironmentVariablesThatAppend(LinkOptionsStr); - const context &Context = InputImpl->get_context(); + const context &Context = FirstImgImpl->get_context(); const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -3061,7 +3091,9 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, std::shared_ptr> KernelIDs{new std::vector}; std::vector NewSpecConstBlob; device_image_impl::SpecConstMapT NewSpecConstMap; - mergeImageData(Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap); + std::unique_ptr MergedImageStorage; + const RTDeviceBinaryImage *NewBinImg = mergeImageData( + Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap, MergedImageStorage); { std::lock_guard Lock(MNativeProgramsMutex); @@ -3070,10 +3102,11 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, // underlying program disposed of). Protecting from incorrect values by // removal of map entries with same handle (obviously invalid entries). std::ignore = NativePrograms.erase(LinkedProg); - for (const device_image_plain &Img : ImgWithDeps) { - NativePrograms.insert( - {LinkedProg, - {ContextImpl, getSyclObjImpl(Img)->get_bin_image_ref()}}); + for (const device_image_plain &Img : Imgs) { + const std::shared_ptr &ImgImpl = getSyclObjImpl(Img); + if (ImgImpl->get_bin_image_ref()) + NativePrograms.insert( + {LinkedProg, {ContextImpl, ImgImpl->get_bin_image_ref()}}); } } @@ -3083,20 +3116,21 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, // input ones and then merge them afterwards. std::vector *> RTCInfoPtrs; - RTCInfoPtrs.reserve(ImgWithDeps.size()); - for (const device_image_plain &DevImg : ImgWithDeps) { + RTCInfoPtrs.reserve(Imgs.size()); + for (const device_image_plain &DevImg : Imgs) { const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); CombinedOrigins |= DevImgImpl->getOriginMask(); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - auto BinImg = InputImpl->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared( - BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), - LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), - CombinedOrigins, std::move(MergedRTCInfo)); + NewBinImg, Context, std::vector{Devs}, + bundle_state::executable, std::move(KernelIDs), LinkedProg, + std::move(NewSpecConstMap), std::move(NewSpecConstBlob), + CombinedOrigins, std::move(MergedRTCInfo), + std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3132,6 +3166,8 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::vector SpecConstBlob; device_image_impl::SpecConstMapT SpecConstMap; + std::unique_ptr MergedImageStorage; + const RTDeviceBinaryImage *ResultBinImg = MainInputImpl->get_bin_image_ref(); if (DevImgWithDeps.hasDeps()) { KernelIDs = std::make_shared>(); // Sort the images to make the order of spec constant values used for @@ -3142,7 +3178,8 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, return getSyclObjImpl(A)->get_bin_image_ref()->getImageID() < getSyclObjImpl(B)->get_bin_image_ref()->getImageID(); }); - mergeImageData(SortedImgs, *KernelIDs, SpecConstBlob, SpecConstMap); + ResultBinImg = mergeImageData(SortedImgs, *KernelIDs, SpecConstBlob, + SpecConstMap, MergedImageStorage); } else { KernelIDs = MainInputImpl->get_kernel_ids_ptr(); SpecConstBlob = MainInputImpl->get_spec_const_blob_ref(); @@ -3165,10 +3202,10 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, std::move(BinImgs), ContextImpl, Devs, &DevImgWithDeps, SpecConstBlob); DeviceImageImplPtr ExecImpl = std::make_shared( - MainInputImpl->get_bin_image_ref(), Context, Devs, + ResultBinImg, Context, std::vector{Devs}, bundle_state::executable, std::move(KernelIDs), ResProgram, std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, - std::move(MergedRTCInfo)); + std::move(MergedRTCInfo), std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index da763fe6f6797..de5901e9b75c3 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -216,7 +216,7 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr &Context); - void addImage(sycl_device_binary RawImg, + void addImage(sycl_device_binary RawImg, bool RegisterImgExports = true, RTDeviceBinaryImage **OutImage = nullptr, std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); @@ -341,9 +341,9 @@ class ProgramManager { // Produces set of device images by convering input device images to object // the executable state - std::vector link(const DevImgPlainWithDeps &ImgWithDeps, - const std::vector &Devs, - const property_list &PropList); + std::vector + link(const std::vector &Imgs, + const std::vector &Devs, const property_list &PropList); // Produces new device image by converting input device image to the // executable state @@ -372,10 +372,12 @@ class ProgramManager { getRawDeviceImages(const std::vector &KernelIDs); std::set - collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); + collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev, + bool ErrorOnUnresolvableImport = true); std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, - const device &Dev); + const device &Dev, + bool ErrorOnUnresolvableImport); private: ProgramManager(ProgramManager const &) = delete; diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index bd4491abeaa10..31a7c1612d0e5 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -262,6 +262,11 @@ bool device::ext_oneapi_architecture_is( } // kernel_compiler extension methods +bool device::ext_oneapi_can_build( + ext::oneapi::experimental::source_language Language) { + return impl->extOneapiCanBuild(Language); +} + bool device::ext_oneapi_can_compile( ext::oneapi::experimental::source_language Language) { return impl->extOneapiCanCompile(Language); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index eee4fc23cfb2c..7fcee9ac6ae0a 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -379,6 +379,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { namespace ext::oneapi::experimental { using source_kb = kernel_bundle; +using obj_kb = kernel_bundle; using exe_kb = kernel_bundle; using kernel_bundle_impl = sycl::detail::kernel_bundle_impl; @@ -495,22 +496,38 @@ source_kb make_kernel_bundle_from_source(const context &SyclContext, } ///////////////////////// -// syclex::detail::build_from_source(source_kb) => exe_kb +// syclex::detail::compile_from_source(source_kb) => obj_kb ///////////////////////// -exe_kb build_from_source( +obj_kb compile_from_source( source_kb &SourceKB, const std::vector &Devices, const std::vector &BuildOptions, sycl::detail::string *LogView, const std::vector &RegisteredKernelNames) { - std::vector Options; - for (const sycl::detail::string_view option : BuildOptions) - Options.push_back(option.data()); + std::string Log; + std::string *LogPtr = nullptr; + if (LogView) + LogPtr = &Log; + std::vector UniqueDevices = + sycl::detail::removeDuplicateDevices(Devices); + std::shared_ptr sourceImpl = getSyclObjImpl(SourceKB); + std::shared_ptr KBImpl = sourceImpl->compile_from_source( + UniqueDevices, BuildOptions, LogPtr, RegisteredKernelNames); + auto result = sycl::detail::createSyclObjFromImpl(KBImpl); + if (LogView) + *LogView = Log; + return result; +} - std::vector KernelNames; - for (const sycl::detail::string_view name : RegisteredKernelNames) - KernelNames.push_back(name.data()); +///////////////////////// +// syclex::detail::build_from_source(source_kb) => exe_kb +///////////////////////// +exe_kb build_from_source( + source_kb &SourceKB, const std::vector &Devices, + const std::vector &BuildOptions, + sycl::detail::string *LogView, + const std::vector &RegisteredKernelNames) { std::string Log; std::string *LogPtr = nullptr; if (LogView) @@ -520,7 +537,7 @@ exe_kb build_from_source( const std::shared_ptr &sourceImpl = getSyclObjImpl(SourceKB); std::shared_ptr KBImpl = sourceImpl->build_from_source( - UniqueDevices, Options, LogPtr, KernelNames); + UniqueDevices, BuildOptions, LogPtr, RegisteredKernelNames); auto result = sycl::detail::createSyclObjFromImpl(std::move(KBImpl)); if (LogView) *LogView = Log; diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp index 91713e2a3293a..2e92784488b86 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_shortcut_with_kb.cpp @@ -21,7 +21,7 @@ constexpr size_t N = 1024; int main() { sycl::queue Q; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp index d0249ef64bcf4..dc2559369b9eb 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_event_and_kb.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue Q; int Memory[N] = {0}; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp index c491a22c69779..eed68407a4790 100644 --- a/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp +++ b/sycl/test-e2e/EnqueueFunctions/kernel_submit_with_kb.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue Q; int Memory[N] = {0}; - if (!Q.get_device().ext_oneapi_can_compile( + if (!Q.get_device().ext_oneapi_can_build( oneapiext::source_language::opencl)) { std::cout << "Backend does not support OpenCL C source kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/opencl.cpp b/sycl/test-e2e/KernelCompiler/opencl.cpp index a532ff468bc5f..e8d8cc44368f9 100644 --- a/sycl/test-e2e/KernelCompiler/opencl.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl.cpp @@ -103,7 +103,7 @@ void test_build_and_run() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { std::cout << "Apparently this device does not support OpenCL C source " "kernel bundle extension: " @@ -164,7 +164,7 @@ void test_error() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { return; } diff --git a/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp b/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp index 90ab79b732b32..9db50e18eaf1e 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_cache_eviction.cpp @@ -50,7 +50,7 @@ void test_build_and_run() { sycl::queue q{ctx, d}; bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::opencl); + q.get_device().ext_oneapi_can_build(syclex::source_language::opencl); if (!ok) { std::cout << "Apparently this device does not support OpenCL C source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/opencl_queries.cpp b/sycl/test-e2e/KernelCompiler/opencl_queries.cpp index 4ccea9db88a7f..3e28e673afc69 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_queries.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_queries.cpp @@ -21,7 +21,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - assert(d.ext_oneapi_can_compile(syclex::source_language::opencl) && + assert(d.ext_oneapi_can_build(syclex::source_language::opencl) && "can_compile(opencl) unexpectedly false"); assert(d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_1_0) && diff --git a/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp b/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp index caad64e8c363a..033213c82f68f 100644 --- a/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp +++ b/sycl/test-e2e/KernelCompiler/opencl_queries_negative.cpp @@ -19,7 +19,7 @@ int main() { sycl::queue q; sycl::device d = q.get_device(); - assert(!d.ext_oneapi_can_compile(syclex::source_language::opencl) && + assert(!d.ext_oneapi_can_build(syclex::source_language::opencl) && "can_compile(opencl) unexpectedly true"); assert(!d.ext_oneapi_supports_cl_c_version(syclex::opencl_c_1_0) && diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index 2fb8c79a4ac4d..1b45686e41e5d 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -527,8 +527,7 @@ int main() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { return -1; } diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp index 2bec52ac46c5a..1ec2f744b632d 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp @@ -58,8 +58,7 @@ int test_persistent_cache() { sycl::context ctx{d}; sycl::queue q{ctx, d}; - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp index d275a81b51ccc..c449c25647414 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache_pm.cpp @@ -88,8 +88,7 @@ void test_build_and_run() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support SYCL source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp index e9dafa672fac7..c7a0be335c0fa 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp @@ -41,7 +41,7 @@ int main() { return false; } for (auto dev : devices) { - if (!dev.ext_oneapi_can_compile(syclexp::source_language::sycl)) { + if (!dev.ext_oneapi_can_build(syclexp::source_language::sycl)) { return false; } } diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp index f457f1b7912ca..93aa4eb9712c6 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_flags.cpp @@ -119,8 +119,7 @@ int main(int argc, char *argv[]) { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "compiling from SYCL source not supported" << std::endl; return 0; // if kernel compilation is not supported, do nothing. diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp index 0ebb59f42959d..ceda7252369e5 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp @@ -53,7 +53,7 @@ int test_device_global() { sycl::context ctx = q.get_context(); sycl::device d = q.get_device(); - bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = d.ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " @@ -141,7 +141,7 @@ int test_error() { sycl::context ctx = q.get_context(); sycl::device d = q.get_device(); - bool ok = d.ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = d.ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { return 0; } diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp new file mode 100644 index 0000000000000..789bab6223546 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -0,0 +1,112 @@ +//==--- sycl_export_registration.cpp --- kernel_compiler extension tests ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a case where a kernel bundle is built that exports a symbol and +// -- other kernel bundles that uses it are compiled/linked without it. These +// -- cases should fail due to unresolved symbols, rather than picking up the +// -- symbol from the registered exported symbols. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = 0; +} + +)==="; + +int main() { + sycl::queue Q; + int Failed = 0; + + if (!Q.get_device().ext_oneapi_can_build(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + // Build the SYCL source that exports symbols. + syclex::build(ExportSourceKB, BuildOpts); + + // Build the SYCL source that imports symbols separately. This should fail to + // resolve exported symbols. + bool BuildFailed = false; + try { + syclex::build(ImportSourceKB, BuildOpts); + } catch (...) { + BuildFailed = true; + } + if (!BuildFailed) { + std::cout << "Building the SYCL source code with unresolved imported " + "symbols did NOT fail." + << std::endl; + ++Failed; + } + + // Compiling the import kernel bundle should work, despite unresolved symbols. + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + + // Link the SYCL source that imports symbols separately. This should fail to + // resolve exported symbols. + bool LinkingFailed = false; + try { + sycl::link({ImportObjKB}); + } catch (...) { + LinkingFailed = true; + } + if (!LinkingFailed) { + std::cout << "Linking the SYCL source code with unresolved imported " + "symbols did NOT fail." + << std::endl; + ++Failed; + } + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp index ada5c06cd1160..bf0d46b5800be 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp @@ -73,8 +73,7 @@ int test_include_paths(const std::string &baseDir) { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 2b4dc8f8e1bec..99ad22396f7af 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -66,7 +66,7 @@ int main() { sycl::queue Q; sycl::context Ctx = Q.get_context(); - if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + if (!Q.get_device().ext_oneapi_can_build(syclex::source_language::sycl)) { std::cout << "Apparently this device does not support `sycl` source " "kernel bundle extension: " << Q.get_device().get_info() diff --git a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp index bf6dd99674a38..4ef1cb2d88e9c 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp @@ -38,8 +38,7 @@ int test_lifetimes() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source " "kernel bundle extension: " diff --git a/sycl/test-e2e/KernelCompiler/sycl_link.cpp b/sycl/test-e2e/KernelCompiler/sycl_link.cpp new file mode 100644 index 0000000000000..9ff9878e387ee --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link.cpp @@ -0,0 +1,102 @@ +//==----------- sycl_link.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a simple linking case with source files compiled from SYCL source +// -- at runtime. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB, ExportObjKB}); + + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel"); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + int Failed = 0; + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "Unexpected value at index " << I << ": " << USMPtr[I] + << " != " << I << std::endl; + ++Failed; + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp new file mode 100644 index 0000000000000..49d67205b8cd1 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -0,0 +1,143 @@ +//==-----sycl_link_common_dep.cpp --- kernel_compiler extension tests ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where two kernels use the same imported symbols. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr, int Size) { + TestFunc(Ptr, Size); + for (size_t I = 0; I < Size; ++I) + ++Ptr[I]; +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource1); + source_kb ImportSourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource2); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB1 = syclex::compile(ImportSourceKB1, BuildOpts); + obj_kb ImportObjKB2 = syclex::compile(ImportSourceKB2, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB1, ImportObjKB2, ExportObjKB}); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + int Failed = 0; + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel1"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "TestKernel1: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << I << std::endl; + ++Failed; + } + } + } + + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + const int Expected = I + 1; + if (USMPtr[I] != Expected) { + std::cout << "TestKernel2: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << Expected << std::endl; + ++Failed; + } + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp new file mode 100644 index 0000000000000..76fe85ce72fa9 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -0,0 +1,149 @@ +//==- sycl_link_common_dep_optional_feature.cpp --- kernel_compiler tests --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for linking where two kernels use the same imported symbols, but one +// -- may not be supported on the device. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr, int Size) { + TestFunc(Ptr, Size); + for (size_t I = 0; I < Size; ++I) + Ptr[I] = static_cast(static_cast(Ptr[I]) / 2.0); +} + +)==="; + +auto constexpr SYCLExportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb ImportSourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource1); + source_kb ImportSourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource2); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource); + + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + obj_kb ImportObjKB1 = syclex::compile(ImportSourceKB1, BuildOpts); + obj_kb ImportObjKB2 = syclex::compile(ImportSourceKB2, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB1, ImportObjKB2, ExportObjKB}); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + int Failed = 0; + { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel1"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "TestKernel1: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << I << std::endl; + ++Failed; + } + } + } + + if (Q.get_device().has(sycl::aspect::fp64)) { + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel2"); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + for (size_t I = 0; I < Range; ++I) { + const int Expected = static_cast(static_cast(I) / 2.0); + if (USMPtr[I] != Expected) { + std::cout << "TestKernel2: Unexpected value at index " << I << ": " + << USMPtr[I] << " != " << Expected << std::endl; + ++Failed; + } + } + } else if (ExecKB.ext_oneapi_has_kernel("TestKernel2")) { + std::cout << "Device does not support fp64, but the kernel bundle still " + "has the kernel using it." + << std::endl; + ++Failed; + } + + sycl::free(USMPtr, Q); + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp new file mode 100644 index 0000000000000..f9d6381e08b38 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp @@ -0,0 +1,116 @@ +//==-- sycl_link_export_conflict.cpp --- kernel_compiler extension tests ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for a case where a kernel bundle with an exported symbol is compiled +// -- before another kernel bundle using a different variant of the symbol. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLImportSource = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size); + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr, int Size) { + TestFunc(Ptr, Size); +} + +)==="; + +auto constexpr SYCLExportSource1 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = 0; +} + +)==="; + +auto constexpr SYCLExportSource2 = R"===( +#include + +SYCL_EXTERNAL void TestFunc(int *Ptr, int Size) { + for (size_t I = 0; I < Size; ++I) + Ptr[I] = I; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + syclex::properties BuildOpts{ + syclex::build_options{"-fsycl-allow-device-image-dependencies"}}; + + source_kb ConflictExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource1); + exe_kb ConflictingExecKB = syclex::build(ConflictExportSourceKB, BuildOpts); + + source_kb ImportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLImportSource); + source_kb ExportSourceKB = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLExportSource2); + + obj_kb ImportObjKB = syclex::compile(ImportSourceKB, BuildOpts); + obj_kb ExportObjKB = syclex::compile(ExportSourceKB, BuildOpts); + + exe_kb ExecKB = sycl::link({ImportObjKB, ExportObjKB}); + + sycl::kernel Kernel = ExecKB.ext_oneapi_get_kernel("TestKernel"); + + constexpr int Range = 10; + int *USMPtr = sycl::malloc_shared(Range, Q); + + memset(USMPtr, 0, Range * sizeof(int)); + Q.submit([&](sycl::handler &Handler) { + Handler.set_args(USMPtr, Range); + Handler.single_task(Kernel); + }); + Q.wait(); + + int Failed = 0; + for (size_t I = 0; I < Range; ++I) { + if (USMPtr[I] != I) { + std::cout << "Unexpected value at index " << I << ": " << USMPtr[I] + << " != " << I << std::endl; + ++Failed; + } + } + + sycl::free(USMPtr, Q); + + return Failed; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp new file mode 100644 index 0000000000000..7fe9b0fd9db79 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp @@ -0,0 +1,87 @@ +//==-- sycl_link_kernel_conflict.cpp --- kernel_compiler extension tests ---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// -- Test for the linking of two kernels with conflicting definitions of +// -- kernels with the same name. + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using obj_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource1 = R"===( +#include + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr) { + *Ptr = 42; +} + +)==="; + +// TODO: remove SYCL_EXTERNAL from the kernel once it is no longer needed. +auto constexpr SYCLSource2 = R"===( +#include + +// use extern "C" to avoid name mangling +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel(int *Ptr) { + *Ptr = 24; +} + +)==="; + +int main() { + sycl::queue Q; + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Device does not support one of the source languages: " + << Q.get_device().get_info() + << std::endl; + return 0; + } + + source_kb SourceKB1 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLSource1); + source_kb SourceKB2 = syclex::create_kernel_bundle_from_source( + Q.get_context(), syclex::source_language::sycl, SYCLSource2); + + obj_kb ObjKB1 = syclex::compile(SourceKB1); + obj_kb ObjKB2 = syclex::compile(SourceKB2); + + try { + sycl::link({ObjKB1, ObjKB2}); + } catch (sycl::exception &E) { + std::cout << "Exception caught: " << E.what() << std::endl; + return 0; + } + + std::cout << "No exception caught while linking two binaries with " + "conflicting kernels." + << std::endl; + + return 1; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp index 03d074fafa48f..77a2d89bed123 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp @@ -23,8 +23,7 @@ int test_tracing() { sycl::queue q; sycl::context ctx = q.get_context(); - bool ok = - q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl); + bool ok = q.get_device().ext_oneapi_can_build(syclex::source_language::sycl); if (!ok) { std::cout << "Apparently this device does not support `sycl` source kernel " "bundle extension: " diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index d27ff51778f7a..fa8c87783f3b0 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3074,6 +3074,7 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implC2ERKNS3_16image_des _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD1Ev _ZN4sycl3_V13ext6oneapi12experimental6detail14image_mem_implD2Ev _ZN4sycl3_V13ext6oneapi12experimental6detail17build_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ +_ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_bundleILNS0_12bundle_stateE3EEERKSt6vectorINS0_6deviceESaISA_EERKS9_INS0_6detail11string_viewESaISG_EEPNSF_6stringESK_ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE @@ -3348,6 +3349,7 @@ _ZN4sycl3_V16detail9modf_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail9modf_implEdPd _ZN4sycl3_V16detail9modf_implEfPf _ZN4sycl3_V16device11get_devicesENS0_4info11device_typeE +_ZN4sycl3_V16device20ext_oneapi_can_buildENS0_3ext6oneapi12experimental15source_languageE _ZN4sycl3_V16device22ext_oneapi_can_compileENS0_3ext6oneapi12experimental15source_languageE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental12architectureE _ZN4sycl3_V16device26ext_oneapi_architecture_isENS0_3ext6oneapi12experimental13arch_categoryE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index dd3e8c2e80d69..4f5590fe67161 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3796,6 +3796,7 @@ ?checkNodePropertiesAndThrow@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@KAXAEBVproperty_list@67@@Z ?clearArgs@handler@_V1@sycl@@AEAAXXZ ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ +?compile_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$00@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vstring_view@detail@_V1@sycl@@V?$allocator@Vstring_view@detail@_V1@sycl@@@std@@@std@@PEAVstring@156@2@Z ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?complete_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAA?AVevent@56@AEBVproperty_list@56@@Z ?computeFallbackKernelBounds@handler@_V1@sycl@@AEAA?AV?$id@$01@23@_K0@Z @@ -3846,6 +3847,7 @@ ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXXZ ?ext_oneapi_can_access_peer@device@_V1@sycl@@QEAA_NAEBV123@W4peer_access@oneapi@ext@23@@Z +?ext_oneapi_can_build@device@_V1@sycl@@QEAA_NW4source_language@experimental@oneapi@ext@23@@Z ?ext_oneapi_can_compile@device@_V1@sycl@@QEAA_NW4source_language@experimental@oneapi@ext@23@@Z ?ext_oneapi_cl_profile@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?ext_oneapi_cl_profile_impl@device@_V1@sycl@@AEBA?AVstring@detail@23@XZ diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 1f8ce4897a5f2..c913f0d6bcaa6 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 14 +// CHECK-NUM-MATCHES: 20 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see