From 870ac9867539b532a1f45fe2a43b9f750045ab21 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 25 Nov 2024 07:38:39 -0800 Subject: [PATCH 01/10] [SYCL] Support image dependencies in kernel bundles --- sycl/source/detail/device_binary_image.hpp | 1 + sycl/source/detail/kernel_bundle_impl.hpp | 139 ++++-- .../program_manager/program_manager.cpp | 445 +++++++++++------- .../program_manager/program_manager.hpp | 77 ++- sycl/source/kernel_bundle.cpp | 18 +- .../DeviceImageDependencies/Inputs/basic.cpp | 49 ++ .../DeviceImageDependencies/dynamic.cpp | 27 +- .../DeviceImageDependencies/objects.cpp | 27 +- sycl/unittests/program_manager/CMakeLists.txt | 3 +- .../DynamicLinking/CMakeLists.txt | 5 + .../{ => DynamicLinking}/DynamicLinking.cpp | 253 +++++++++- 11 files changed, 702 insertions(+), 342 deletions(-) create mode 100644 sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp create mode 100644 sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt rename sycl/unittests/program_manager/{ => DynamicLinking}/DynamicLinking.cpp (54%) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 203427b89ca45..b95ab50d954b8 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -119,6 +119,7 @@ class RTDeviceBinaryImage { ConstIterator begin() const { return ConstIterator(Begin); } ConstIterator end() const { return ConstIterator(End); } size_t size() const { return std::distance(begin(), end()); } + bool empty() const { return begin() == end(); } friend class RTDeviceBinaryImage; bool isAvailable() const { return !(Begin == nullptr); } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 58e605e85c458..166869cb5035d 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -86,6 +86,7 @@ class kernel_bundle_impl { MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, State); + fillUniqueDeviceImages(); } // Interop constructor used by make_kernel @@ -103,7 +104,8 @@ class kernel_bundle_impl { kernel_bundle_impl(context Ctx, std::vector Devs, device_image_plain &DevImage) : kernel_bundle_impl(Ctx, Devs) { - MDeviceImages.push_back(DevImage); + MDeviceImages.emplace_back(DevImage); + MUniqueDeviceImages.emplace_back(DevImage); } // Matches sycl::build and sycl::compile @@ -115,10 +117,12 @@ class kernel_bundle_impl { : MContext(InputBundle.get_context()), MDevices(std::move(Devs)), MState(TargetState) { - MSpecConstValues = getSyclObjImpl(InputBundle)->get_spec_const_map_ref(); + const std::shared_ptr &InputBundleImpl = + getSyclObjImpl(InputBundle); + MSpecConstValues = InputBundleImpl->get_spec_const_map_ref(); const std::vector &InputBundleDevices = - getSyclObjImpl(InputBundle)->get_devices(); + InputBundleImpl->get_devices(); const bool AllDevsAssociatedWithInputBundle = std::all_of(MDevices.begin(), MDevices.end(), [&InputBundleDevices](const device &Dev) { @@ -132,24 +136,37 @@ class kernel_bundle_impl { "Not all devices are in the set of associated " "devices for input bundle or vector of devices is empty"); - for (const device_image_plain &DeviceImage : InputBundle) { + for (const DevImgPlainWithDeps &DevImgWithDeps : + InputBundleImpl->MDeviceImages) { // Skip images which are not compatible with devices provided - if (std::none_of( - MDevices.begin(), MDevices.end(), - [&DeviceImage](const device &Dev) { - return getSyclObjImpl(DeviceImage)->compatible_with_device(Dev); - })) + if (std::none_of(MDevices.begin(), MDevices.end(), + [&DevImgWithDeps](const device &Dev) { + return getSyclObjImpl(DevImgWithDeps.getMain()) + ->compatible_with_device(Dev); + })) continue; switch (TargetState) { - case bundle_state::object: - MDeviceImages.push_back(detail::ProgramManager::getInstance().compile( - DeviceImage, MDevices, PropList)); + case bundle_state::object: { + DevImgPlainWithDeps CompiledImgWithDeps = + detail::ProgramManager::getInstance().compile(DevImgWithDeps, + MDevices, PropList); + + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + CompiledImgWithDeps.begin(), + CompiledImgWithDeps.end()); + MDeviceImages.push_back(std::move(CompiledImgWithDeps)); break; - case bundle_state::executable: - MDeviceImages.push_back(detail::ProgramManager::getInstance().build( - DeviceImage, MDevices, PropList)); + } + + case bundle_state::executable: { + device_image_plain BuiltImg = + detail::ProgramManager::getInstance().build(DevImgWithDeps, + MDevices, PropList); + MDeviceImages.emplace_back(BuiltImg); + MUniqueDeviceImages.push_back(BuiltImg); break; + } case bundle_state::input: case bundle_state::ext_oneapi_source: throw exception(make_error_code(errc::runtime), @@ -158,6 +175,7 @@ class kernel_bundle_impl { break; } } + removeDuplicateImages(); } // Matches sycl::link @@ -201,7 +219,7 @@ class kernel_bundle_impl { "Not all devices are in the set of associated " "devices for input bundles"); - // TODO: Unify with c'tor for sycl::comile and sycl::build by calling + // TODO: Unify with c'tor for sycl::compile and sycl::build by calling // sycl::join on vector of kernel_bundles // The loop below just links each device image separately, not linking any @@ -213,23 +231,27 @@ class kernel_bundle_impl { // undefined symbols, then the logic in this loop will need to be changed. for (const kernel_bundle &ObjectBundle : ObjectBundles) { - for (const device_image_plain &DeviceImage : ObjectBundle) { + for (const DevImgPlainWithDeps &DeviceImageWithDeps : + getSyclObjImpl(ObjectBundle)->MDeviceImages) { // Skip images which are not compatible with devices provided if (std::none_of(MDevices.begin(), MDevices.end(), - [&DeviceImage](const device &Dev) { - return getSyclObjImpl(DeviceImage) + [&DeviceImageWithDeps](const device &Dev) { + return getSyclObjImpl(DeviceImageWithDeps.getMain()) ->compatible_with_device(Dev); })) continue; std::vector LinkedResults = - detail::ProgramManager::getInstance().link(DeviceImage, MDevices, - PropList); + detail::ProgramManager::getInstance().link(DeviceImageWithDeps, + MDevices, PropList); MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), LinkedResults.end()); + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), + LinkedResults.begin(), LinkedResults.end()); } } + removeDuplicateImages(); for (const kernel_bundle &Bundle : ObjectBundles) { const KernelBundleImplPtr BundlePtr = getSyclObjImpl(Bundle); @@ -249,6 +271,7 @@ class kernel_bundle_impl { MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, KernelIDs, State); + fillUniqueDeviceImages(); } kernel_bundle_impl(context Ctx, std::vector Devs, @@ -259,6 +282,7 @@ class kernel_bundle_impl { MDeviceImages = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, MDevices, Selector, State); + fillUniqueDeviceImages(); } // C'tor matches sycl::join API @@ -287,11 +311,10 @@ class kernel_bundle_impl { Bundle->MDeviceImages.end()); } - std::sort(MDeviceImages.begin(), MDeviceImages.end(), - LessByHash{}); + fillUniqueDeviceImages(); if (get_bundle_state() == bundle_state::input) { - // Copy spec constants values from the device images to be removed. + // Copy spec constants values from the device images. auto MergeSpecConstants = [this](const device_image_plain &Img) { const detail::DeviceImageImplPtr &ImgImpl = getSyclObjImpl(Img); const std::map> &SpecConst : Bundle->MSpecConstValues) { @@ -605,7 +622,7 @@ class kernel_bundle_impl { assert(MDeviceImages.size() > 0); const std::shared_ptr &DeviceImageImpl = - detail::getSyclObjImpl(MDeviceImages[0]); + detail::getSyclObjImpl(MDeviceImages[0].getMain()); ur_program_handle_t UrProgram = DeviceImageImpl->get_ur_program_ref(); ContextImplPtr ContextImpl = getSyclObjImpl(MContext); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -634,7 +651,7 @@ class kernel_bundle_impl { // Collect kernel ids from all device images, then remove duplicates std::vector Result; - for (const device_image_plain &DeviceImage : MDeviceImages) { + for (const device_image_plain &DeviceImage : MUniqueDeviceImages) { const std::vector &KernelIDs = getSyclObjImpl(DeviceImage)->get_kernel_ids(); @@ -662,8 +679,9 @@ class kernel_bundle_impl { // Used to track if any of the candidate images has specialization values // set. bool SpecConstsSet = false; - for (auto &DeviceImage : MDeviceImages) { - if (!DeviceImage.has_kernel(KernelID)) + for (const DevImgPlainWithDeps &DeviceImageWithDeps : MDeviceImages) { + const device_image_plain &DeviceImage = DeviceImageWithDeps.getMain(); + if (!DeviceImageWithDeps.getMain().has_kernel(KernelID)) continue; const auto DeviceImageImpl = detail::getSyclObjImpl(DeviceImage); @@ -718,7 +736,7 @@ class kernel_bundle_impl { } bool has_kernel(const kernel_id &KernelID) const noexcept { - return std::any_of(MDeviceImages.begin(), MDeviceImages.end(), + return std::any_of(begin(), end(), [&KernelID](const device_image_plain &DeviceImage) { return DeviceImage.has_kernel(KernelID); }); @@ -726,7 +744,7 @@ class kernel_bundle_impl { bool has_kernel(const kernel_id &KernelID, const device &Dev) const noexcept { return std::any_of( - MDeviceImages.begin(), MDeviceImages.end(), + begin(), end(), [&KernelID, &Dev](const device_image_plain &DeviceImage) { return DeviceImage.has_kernel(KernelID, Dev); }); @@ -734,7 +752,7 @@ class kernel_bundle_impl { bool contains_specialization_constants() const noexcept { return std::any_of( - MDeviceImages.begin(), MDeviceImages.end(), + MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), [](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage)->has_specialization_constants(); }); @@ -742,7 +760,7 @@ class kernel_bundle_impl { bool native_specialization_constant() const noexcept { return contains_specialization_constants() && - std::all_of(MDeviceImages.begin(), MDeviceImages.end(), + std::all_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), [](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->all_specialization_constant_native(); @@ -750,7 +768,7 @@ class kernel_bundle_impl { } bool has_specialization_constant(const char *SpecName) const noexcept { - return std::any_of(MDeviceImages.begin(), MDeviceImages.end(), + return std::any_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), [SpecName](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->has_specialization_constant(SpecName); @@ -761,7 +779,7 @@ class kernel_bundle_impl { const void *Value, size_t Size) noexcept { if (has_specialization_constant(SpecName)) - for (const device_image_plain &DeviceImage : MDeviceImages) + for (const device_image_plain &DeviceImage : MUniqueDeviceImages) getSyclObjImpl(DeviceImage) ->set_specialization_constant_raw_value(SpecName, Value); else { @@ -773,7 +791,7 @@ class kernel_bundle_impl { void get_specialization_constant_raw_value(const char *SpecName, void *ValueRet) const noexcept { - for (const device_image_plain &DeviceImage : MDeviceImages) + for (const device_image_plain &DeviceImage : MUniqueDeviceImages) if (getSyclObjImpl(DeviceImage)->has_specialization_constant(SpecName)) { getSyclObjImpl(DeviceImage) ->get_specialization_constant_raw_value(SpecName, ValueRet); @@ -796,7 +814,7 @@ class kernel_bundle_impl { bool is_specialization_constant_set(const char *SpecName) const noexcept { bool SetInDevImg = - std::any_of(MDeviceImages.begin(), MDeviceImages.end(), + std::any_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), [SpecName](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->is_specialization_constant_set(SpecName); @@ -804,13 +822,13 @@ class kernel_bundle_impl { return SetInDevImg || MSpecConstValues.count(std::string{SpecName}) != 0; } - const device_image_plain *begin() const { return MDeviceImages.data(); } + const device_image_plain *begin() const { return MUniqueDeviceImages.data(); } const device_image_plain *end() const { - return MDeviceImages.data() + MDeviceImages.size(); + return MUniqueDeviceImages.data() + MUniqueDeviceImages.size(); } - size_t size() const noexcept { return MDeviceImages.size(); } + size_t size() const noexcept { return MUniqueDeviceImages.size(); } bundle_state get_bundle_state() const { return MState; } @@ -827,7 +845,7 @@ class kernel_bundle_impl { // First try and get images in current bundle state const bundle_state BundleState = get_bundle_state(); - std::vector NewDevImgs = + std::vector NewDevImgs = detail::ProgramManager::getInstance().getSYCLDeviceImages( MContext, {Dev}, {KernelID}, BundleState); @@ -836,21 +854,38 @@ class kernel_bundle_impl { return false; // Propagate already set specialization constants to the new images - for (device_image_plain &DevImg : NewDevImgs) - for (auto SpecConst : MSpecConstValues) - getSyclObjImpl(DevImg)->set_specialization_constant_raw_value( - SpecConst.first.c_str(), SpecConst.second.data()); + for (DevImgPlainWithDeps &DevImgWithDeps : NewDevImgs) + for (device_image_plain &DevImg : DevImgWithDeps) + for (auto SpecConst : MSpecConstValues) + getSyclObjImpl(DevImg)->set_specialization_constant_raw_value( + SpecConst.first.c_str(), SpecConst.second.data()); // Add the images to the collection MDeviceImages.insert(MDeviceImages.end(), NewDevImgs.begin(), NewDevImgs.end()); + removeDuplicateImages(); return true; } private: + void fillUniqueDeviceImages() { + assert(MUniqueDeviceImages.empty()); + for (const DevImgPlainWithDeps &Imgs : MDeviceImages) + MUniqueDeviceImages.insert(MUniqueDeviceImages.end(), Imgs.begin(), + Imgs.end()); + removeDuplicateImages(); + } + void removeDuplicateImages() { + std::sort(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), + LessByHash{}); + const auto It = + std::unique(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end()); + MUniqueDeviceImages.erase(It, MUniqueDeviceImages.end()); + } context MContext; std::vector MDevices; - std::vector MDeviceImages; + std::vector MDeviceImages; + std::vector MUniqueDeviceImages; // This map stores values for specialization constants, that are missing // from any device image. SpecConstMapT MSpecConstValues; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 58b6aa05afed6..aabc10d78508c 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -603,7 +603,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, return (0 == SuitableImageID); } -static bool checkLinkingSupport(device Dev, const RTDeviceBinaryImage &Img) { +static bool checkLinkingSupport(const device &Dev, + const RTDeviceBinaryImage &Img) { const char *Target = Img.getRawData().DeviceTargetSpec; // TODO replace with extension checks once implemented in UR. if (strcmp(Target, __SYCL_DEVICE_BINARY_TARGET_SPIRV64) == 0) { @@ -615,9 +616,24 @@ static bool checkLinkingSupport(device Dev, const RTDeviceBinaryImage &Img) { return false; } +std::set +ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, + const device &Dev) { + // TODO collecting dependencies for virtual functions and imported symbols + // should be combined since one can lead to new unresolved dependencies for + // the other. + std::set DeviceImagesToLink = + collectDependentDeviceImagesForVirtualFunctions(Img, Dev); + + std::set ImageDeps = + collectDeviceImageDepsForImportedSymbols(Img, Dev); + DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); + return DeviceImagesToLink; +} + std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( - const RTDeviceBinaryImage &MainImg, device Dev) { + const RTDeviceBinaryImage &MainImg, const device &Dev) { std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; @@ -663,7 +679,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( std::set ProgramManager::collectDependentDeviceImagesForVirtualFunctions( - const RTDeviceBinaryImage &Img, device Dev) { + const RTDeviceBinaryImage &Img, const device &Dev) { // If virtual functions are used in a program, then we need to link several // device images together to make sure that vtable pointers stored in // objects are valid between different kernels (which could be in different @@ -793,55 +809,48 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (auto exception = checkDevSupportDeviceRequirements(Device, Img, NDRDesc)) throw *exception; - // TODO collecting dependencies for virtual functions and imported symbols - // should be combined since one can lead to new unresolved dependencies for - // the other. std::set DeviceImagesToLink = - collectDependentDeviceImagesForVirtualFunctions(Img, Device); - - std::set ImageDeps = - collectDeviceImageDepsForImportedSymbols(Img, Device); - DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); + collectDeviceImageDeps(Img, {Device}); // Decompress all DeviceImagesToLink for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) CheckAndDecompressImage(BinImg); std::vector AllImages; - AllImages.reserve(ImageDeps.size() + 1); + AllImages.reserve(DeviceImagesToLink.size() + 1); AllImages.push_back(&Img); - std::copy(ImageDeps.begin(), ImageDeps.end(), std::back_inserter(AllImages)); + std::copy(DeviceImagesToLink.begin(), DeviceImagesToLink.end(), + std::back_inserter(AllImages)); - return getBuiltURProgram(Img, Context, {Device}, DeviceImagesToLink, - AllImages); + return getBuiltURProgram(AllImages, Context, {Device}); } ur_program_handle_t ProgramManager::getBuiltURProgram( - const RTDeviceBinaryImage &Img, const context &Context, - const std::vector &Devs, - const std::set &DeviceImagesToLink, - const std::vector &AllImages, - const std::shared_ptr &DeviceImageImpl, + const BinImgWithDeps &ImgWithDeps, const context &Context, + const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps, const SerializedObj &SpecConsts) { std::string CompileOpts; std::string LinkOpts; applyOptionsFromEnvironment(CompileOpts, LinkOpts); - auto BuildF = [this, &Img, &DeviceImageImpl, &Context, &Devs, &CompileOpts, - &LinkOpts, SpecConsts, &DeviceImagesToLink, &AllImages] { + auto BuildF = [this, &ImgWithDeps, &DevImgWithDeps, &Context, &Devs, + &CompileOpts, &LinkOpts, &SpecConsts] { const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); const AdapterPtr &Adapter = ContextImpl->getAdapter(); - applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Adapter); + const RTDeviceBinaryImage &MainImg = *ImgWithDeps.getMain(); + applyOptionsFromImage(CompileOpts, LinkOpts, MainImg, Devs, Adapter); // Should always come last! appendCompileEnvironmentVariablesThatAppend(CompileOpts); appendLinkEnvironmentVariablesThatAppend(LinkOpts); - auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram( - Img, {AllImages}, Context, Devs, CompileOpts + LinkOpts, SpecConsts); + auto [NativePrg, DeviceCodeWasInCache] = + getOrCreateURProgram(MainImg, ImgWithDeps.getAll(), Context, Devs, + CompileOpts + LinkOpts, SpecConsts); - if (!DeviceCodeWasInCache && Img.supportsSpecConstants()) { + if (!DeviceCodeWasInCache && MainImg.supportsSpecConstants()) { enableITTAnnotationsIfNeeded(NativePrg, Adapter); - if (DeviceImageImpl) - setSpecializationConstants(DeviceImageImpl, NativePrg, Adapter); + if (DevImgWithDeps) + setSpecializationConstants(getSyclObjImpl(DevImgWithDeps->getMain()), + NativePrg, Adapter); } UrFuncInfo programReleaseInfo; @@ -857,24 +866,32 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // no fallback device library will be linked. uint32_t DeviceLibReqMask = 0; bool UseDeviceLibs = !DeviceCodeWasInCache && - Img.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && + MainImg.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && !SYCLConfig::get(); if (UseDeviceLibs) - DeviceLibReqMask = getDeviceLibReqMask(Img); + DeviceLibReqMask = getDeviceLibReqMask(MainImg); std::vector ProgramsToLink; // If we had a program in cache, then it should have been the fully linked // program already. if (!DeviceCodeWasInCache) { - for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) { + assert(!DevImgWithDeps || + DevImgWithDeps->getAll().size() == ImgWithDeps.getAll().size()); + // Oth image is the main one and has been handled, skip it. + for (std::size_t I = 1; I < ImgWithDeps.getAll().size(); ++I) { + const RTDeviceBinaryImage *BinImg = ImgWithDeps.getAll()[I]; if (UseDeviceLibs) DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); ur_program_handle_t NativePrg = createURProgram(*BinImg, Context, Devs); - if (BinImg->supportsSpecConstants()) + if (BinImg->supportsSpecConstants()) { enableITTAnnotationsIfNeeded(NativePrg, Adapter); - + if (DevImgWithDeps) + setSpecializationConstants( + getSyclObjImpl(DevImgWithDeps->getAll()[I]), NativePrg, + Adapter); + } ProgramsToLink.push_back(NativePrg); } } @@ -883,10 +900,11 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( for (auto Dev : Devs) URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef()); - ProgramPtr BuiltProgram = build( - std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, - URDevices, DeviceLibReqMask, ProgramsToLink, - /*CreatedFromBinary*/ Img.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); + ProgramPtr BuiltProgram = + build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, + URDevices, DeviceLibReqMask, ProgramsToLink, + /*CreatedFromBinary*/ MainImg.getFormat() != + SYCL_DEVICE_BINARY_TYPE_SPIRV); // Those extra programs won't be used anymore, just the final linked result for (ur_program_handle_t Prg : ProgramsToLink) @@ -895,19 +913,18 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( { std::lock_guard Lock(MNativeProgramsMutex); - NativePrograms.insert({BuiltProgram.get(), &Img}); - for (RTDeviceBinaryImage *LinkedImg : DeviceImagesToLink) { - NativePrograms.insert({BuiltProgram.get(), LinkedImg}); + for (const RTDeviceBinaryImage *Img : ImgWithDeps) { + NativePrograms.insert({BuiltProgram.get(), Img}); } } - ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img); + ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &MainImg); // Save program to persistent cache if it is not there if (!DeviceCodeWasInCache) { - PersistentDeviceCodeCache::putItemToDisc(Devs, AllImages, SpecConsts, - CompileOpts + LinkOpts, - BuiltProgram.get()); + PersistentDeviceCodeCache::putItemToDisc( + Devs, ImgWithDeps.getAll(), SpecConsts, CompileOpts + LinkOpts, + BuiltProgram.get()); } return BuiltProgram.release(); @@ -916,7 +933,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (!SYCLConfig::get()) return BuildF(); - uint32_t ImgId = Img.getImageID(); + uint32_t ImgId = ImgWithDeps.getMain()->getImageID(); std::set URDevicesSet; std::transform(Devs.begin(), Devs.end(), std::inserter(URDevicesSet, URDevicesSet.begin()), @@ -951,8 +968,9 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // If we linked any extra device images, then we need to // cache them as well. auto CacheLinkedImages = [&Adapter, &Cache, &CacheKey, &ResProgram, - &DeviceImagesToLink] { - for (const RTDeviceBinaryImage *BImg : DeviceImagesToLink) { + &ImgWithDeps] { + for (auto It = ImgWithDeps.depsBegin(); It != ImgWithDeps.depsEnd(); ++It) { + const RTDeviceBinaryImage *BImg = *It; // CacheKey is captured by reference by GetCachedBuildF, so we can simply // update it here and re-use that lambda. CacheKey.first.second = BImg->getImageID(); @@ -2038,12 +2056,17 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { (strcmp(Format, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_FPGA) == 0)); }; - // There are only two initial states so far - SPIRV which needs to be compiled - // and linked and fully compiled(AOTed) binary + // Three possible initial states: + // - SPIRV that needs to be compiled and linked + // - AOT compiled binary with dependnecies, needs linking. + // - AOT compiled binary without dependencies. const bool IsAOT = IsAOTBinary(BinImage->getRawData().DeviceTargetSpec); - return IsAOT ? sycl::bundle_state::executable : sycl::bundle_state::input; + if (!IsAOT) + return sycl::bundle_state::input; + return BinImage->getImportedSymbols().empty() ? sycl::bundle_state::executable + : sycl::bundle_state::object; } kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { @@ -2199,7 +2222,7 @@ device_image_plain ProgramManager::getDeviceImageFromBinaryImage( return createSyclObjFromImpl(Impl); } -std::vector +std::vector ProgramManager::getSYCLDeviceImagesWithCompatibleState( const context &Ctx, const std::vector &Devs, bundle_state TargetState, const std::vector &KernelIDs) { @@ -2242,8 +2265,6 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ++It; } - std::vector SYCLDeviceImages; - // If a non-input state is requested, we can filter out some compatible // images and return only those with the highest compatible state for each // device-kernel pair. This map tracks how many kernel-device pairs need each @@ -2252,6 +2273,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // a separate branch for that case to avoid unnecessary tracking work. struct DeviceBinaryImageInfo { std::shared_ptr> KernelIDs; + std::set Deps; bundle_state State = bundle_state::input; int RequirementCounter = 0; }; @@ -2282,6 +2304,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( std::lock_guard KernelIDsGuard(m_KernelIDsMutex); ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; } + ImgInfo.Deps = collectDeviceImageDeps(*BinImage, {Dev}); } const bundle_state ImgState = ImgInfo.State; const std::shared_ptr> &ImageKernelIDs = @@ -2332,26 +2355,59 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( } } + // Filter out main images that are represented as dependencies of other chosen + // images to avoid unnecessary duplication. + // TODO it might make sense to do something about shared dependencies as well. + for (const auto &ImgInfoPair : ImageInfoMap) { + if (ImgInfoPair.second.RequirementCounter == 0) + continue; + for (RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { + auto It = ImageInfoMap.find(Dep); + if (It != ImageInfoMap.end()) + It->second.RequirementCounter = 0; + } + } + + std::vector SYCLDeviceImages; for (const auto &ImgInfoPair : ImageInfoMap) { if (ImgInfoPair.second.RequirementCounter == 0) continue; - DeviceImageImplPtr Impl = std::make_shared( + DeviceImageImplPtr MainImpl = std::make_shared( ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State, ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); - SYCLDeviceImages.push_back(createSyclObjFromImpl(Impl)); + std::vector Images; + const std::set &Deps = ImgInfoPair.second.Deps; + Images.reserve(Deps.size() + 1); + Images.push_back(createSyclObjFromImpl(MainImpl)); + for (RTDeviceBinaryImage *Dep : Deps) { + std::shared_ptr> DepKernelIDs; + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + DepKernelIDs = m_BinImg2KernelIDs[Dep]; + } + + assert(ImgInfoPair.second.State == getBinImageState(Dep) && + "State mismatch between main image and its dependency"); + DeviceImageImplPtr DepImpl = std::make_shared( + Dep, Ctx, Devs, ImgInfoPair.second.State, DepKernelIDs, + /*PIProgram=*/nullptr); + + Images.push_back(createSyclObjFromImpl(DepImpl)); + } + SYCLDeviceImages.push_back(std::move(Images)); } return SYCLDeviceImages; } void ProgramManager::bringSYCLDeviceImagesToState( - std::vector &DeviceImages, bundle_state TargetState) { - - for (device_image_plain &DevImage : DeviceImages) { - const bundle_state DevImageState = getSyclObjImpl(DevImage)->get_state(); + std::vector &DeviceImages, bundle_state TargetState) { + for (DevImgPlainWithDeps &ImgWithDeps : DeviceImages) { + device_image_plain &MainImg = ImgWithDeps.getMain(); + const bundle_state DevImageState = getSyclObjImpl(MainImg)->get_state(); // At this time, there is no circumstance where a device image should ever // be in the source state. That not good. assert(DevImageState != bundle_state::ext_oneapi_source); @@ -2367,8 +2423,9 @@ void ProgramManager::bringSYCLDeviceImagesToState( break; case bundle_state::object: if (DevImageState == bundle_state::input) { - DevImage = compile(DevImage, getSyclObjImpl(DevImage)->get_devices(), - /*PropList=*/{}); + ImgWithDeps = + compile(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); break; } // Device image is expected to be object state then. @@ -2382,22 +2439,22 @@ void ProgramManager::bringSYCLDeviceImagesToState( assert(DevImageState != bundle_state::ext_oneapi_source); break; case bundle_state::input: - DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(), - /*PropList=*/{}); + ImgWithDeps = build(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); break; case bundle_state::object: { std::vector LinkedDevImages = - link({DevImage}, getSyclObjImpl(DevImage)->get_devices(), + link(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), /*PropList=*/{}); // Since only one device image is passed here one output device image is // expected assert(LinkedDevImages.size() == 1 && "Expected one linked image here"); - DevImage = LinkedDevImages[0]; + ImgWithDeps = LinkedDevImages[0]; break; } case bundle_state::executable: - DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(), - /*PropList=*/{}); + ImgWithDeps = build(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); break; } break; @@ -2406,30 +2463,33 @@ void ProgramManager::bringSYCLDeviceImagesToState( } } -std::vector +std::vector ProgramManager::getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, bundle_state TargetState) { // Collect device images with compatible state - std::vector DeviceImages = + std::vector DeviceImages = getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState); // Bring device images with compatible state to desired state. bringSYCLDeviceImagesToState(DeviceImages, TargetState); return DeviceImages; } -std::vector ProgramManager::getSYCLDeviceImages( +std::vector ProgramManager::getSYCLDeviceImages( const context &Ctx, const std::vector &Devs, const DevImgSelectorImpl &Selector, bundle_state TargetState) { // Collect device images with compatible state - std::vector DeviceImages = + std::vector DeviceImages = getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState); // Filter out images that are rejected by Selector - auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(), - [&Selector](const device_image_plain &Image) { - return !Selector(getSyclObjImpl(Image)); - }); + // TODO Clarify spec, should the selector be able to affect dependent images + // here? + auto It = std::remove_if( + DeviceImages.begin(), DeviceImages.end(), + [&Selector](const DevImgPlainWithDeps &ImageWithDeps) { + return !Selector(getSyclObjImpl(ImageWithDeps.getMain())); + }); DeviceImages.erase(It, DeviceImages.end()); // The spec says that the function should not call online compiler or linker @@ -2437,7 +2497,7 @@ std::vector ProgramManager::getSYCLDeviceImages( return DeviceImages; } -std::vector ProgramManager::getSYCLDeviceImages( +std::vector ProgramManager::getSYCLDeviceImages( const context &Ctx, const std::vector &Devs, const std::vector &KernelIDs, bundle_state TargetState) { // Fast path for when no kernel IDs are requested @@ -2457,7 +2517,7 @@ std::vector ProgramManager::getSYCLDeviceImages( } // Collect device images with compatible state - std::vector DeviceImages = + std::vector DeviceImages = getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs); // Bring device images with compatible state to desired state. @@ -2465,8 +2525,8 @@ std::vector ProgramManager::getSYCLDeviceImages( return DeviceImages; } -device_image_plain -ProgramManager::compile(const device_image_plain &DeviceImage, +DevImgPlainWithDeps +ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, const std::vector &Devs, const property_list &) { @@ -2474,57 +2534,110 @@ ProgramManager::compile(const device_image_plain &DeviceImage, // how they can be passed. // TODO: Probably we could have cached compiled device images. - const std::shared_ptr &InputImpl = - getSyclObjImpl(DeviceImage); - - const AdapterPtr &Adapter = - getSyclObjImpl(InputImpl->get_context())->getAdapter(); - - ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(), - InputImpl->get_context(), Devs); - - if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) - setSpecializationConstants(InputImpl, Prog, Adapter); - - DeviceImageImplPtr ObjectImpl = std::make_shared( - InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, - bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, - InputImpl->get_spec_const_data_ref(), - InputImpl->get_spec_const_blob_ref()); + // TODO: Handle zero sized Device list. std::vector URDevices; URDevices.reserve(Devs.size()); for (const device &Dev : Devs) URDevices.push_back(getSyclObjImpl(Dev)->getHandleRef()); - // TODO: Handle zero sized Device list. - std::string CompileOptions; - applyCompileOptionsFromEnvironment(CompileOptions); - appendCompileOptionsFromImage( - CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Adapter); - // Should always come last! - appendCompileEnvironmentVariablesThatAppend(CompileOptions); - ur_result_t Error = doCompile( - Adapter, ObjectImpl->get_ur_program_ref(), Devs.size(), URDevices.data(), - getSyclObjImpl(InputImpl->get_context()).get()->getHandleRef(), - CompileOptions.c_str()); - if (Error != UR_RESULT_SUCCESS) - throw sycl::exception( - make_error_code(errc::build), - getProgramBuildLog(ObjectImpl->get_ur_program_ref(), - getSyclObjImpl(ObjectImpl->get_context()))); + std::vector CompiledImages; + CompiledImages.reserve(ImgWithDeps.size()); + for (const device_image_plain &DeviceImage : ImgWithDeps.getAll()) { + const std::shared_ptr &InputImpl = + getSyclObjImpl(DeviceImage); + + const AdapterPtr &Adapter = + getSyclObjImpl(InputImpl->get_context())->getAdapter(); + + ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(), + InputImpl->get_context(), Devs); + + if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) + setSpecializationConstants(InputImpl, Prog, Adapter); - return createSyclObjFromImpl(ObjectImpl); + DeviceImageImplPtr ObjectImpl = std::make_shared( + InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, + bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, + InputImpl->get_spec_const_data_ref(), + InputImpl->get_spec_const_blob_ref()); + + std::string CompileOptions; + applyCompileOptionsFromEnvironment(CompileOptions); + appendCompileOptionsFromImage( + CompileOptions, *(InputImpl->get_bin_image_ref()), Devs, Adapter); + // Should always come last! + appendCompileEnvironmentVariablesThatAppend(CompileOptions); + ur_result_t Error = doCompile( + Adapter, ObjectImpl->get_ur_program_ref(), Devs.size(), + URDevices.data(), + getSyclObjImpl(InputImpl->get_context()).get()->getHandleRef(), + CompileOptions.c_str()); + if (Error != UR_RESULT_SUCCESS) + throw sycl::exception( + make_error_code(errc::build), + getProgramBuildLog(ObjectImpl->get_ur_program_ref(), + getSyclObjImpl(ObjectImpl->get_context()))); + + CompiledImages.push_back( + createSyclObjFromImpl(ObjectImpl)); + } + return CompiledImages; +} + +static void mergeImageData(const std::vector &Imgs, + std::vector &KernelIDs, + std::vector &NewSpecConstBlob, + device_image_impl::SpecConstMapT &NewSpecConstMap) { + for (const device_image_plain &Img : Imgs) { + std::shared_ptr DeviceImageImpl = getSyclObjImpl(Img); + // Duplicates are not expected here, otherwise urProgramLink should fail + KernelIDs.insert(KernelIDs.end(), + DeviceImageImpl->get_kernel_ids_ptr()->begin(), + DeviceImageImpl->get_kernel_ids_ptr()->end()); + // To be able to answer queries about specialziation constants, the new + // device image should have the specialization constants from all the linked + // images. + const std::lock_guard SpecConstLock( + DeviceImageImpl->get_spec_const_data_lock()); + // Copy all map entries to the new map. Since the blob will be copied to + // the end of the new blob we need to move the blob offset of each entry. + for (const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) { + std::vector &NewDescEntries = + NewSpecConstMap[SpecConstIt.first]; + + if (NewDescEntries.empty()) { + NewDescEntries.reserve(SpecConstIt.second.size()); + for (const device_image_impl::SpecConstDescT &SpecConstDesc : + SpecConstIt.second) { + device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc; + NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size(); + NewDescEntries.push_back(std::move(NewSpecConstDesc)); + } + } + } + + // Copy the blob from the device image into the new blob. This moves the + // offsets of the following blobs. + NewSpecConstBlob.insert(NewSpecConstBlob.end(), + DeviceImageImpl->get_spec_const_blob_ref().begin(), + DeviceImageImpl->get_spec_const_blob_ref().end()); + } + // device_image_impl expects kernel ids to be sorted for fast search + std::sort(KernelIDs.begin(), KernelIDs.end(), LessByHash{}); } std::vector -ProgramManager::link(const device_image_plain &DeviceImage, +ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, const std::vector &Devs, const property_list &PropList) { (void)PropList; + const std::vector &Imgs = ImgWithDeps.getAll(); std::vector URPrograms; - URPrograms.push_back(getSyclObjImpl(DeviceImage)->get_ur_program_ref()); + URPrograms.reserve(Imgs.size()); + for (const device_image_plain &Img : Imgs) + URPrograms.push_back(getSyclObjImpl(Img)->get_ur_program_ref()); std::vector URDevices; URDevices.reserve(Devs.size()); @@ -2533,15 +2646,16 @@ ProgramManager::link(const device_image_plain &DeviceImage, std::string LinkOptionsStr; applyLinkOptionsFromEnvironment(LinkOptionsStr); + const device_image_plain &MainImg = ImgWithDeps.getMain(); if (LinkOptionsStr.empty()) { const std::shared_ptr &InputImpl = - getSyclObjImpl(DeviceImage); + getSyclObjImpl(MainImg); appendLinkOptionsFromImage(LinkOptionsStr, *(InputImpl->get_bin_image_ref())); } // Should always come last! appendLinkEnvironmentVariablesThatAppend(LinkOptionsStr); - const context &Context = getSyclObjImpl(DeviceImage)->get_context(); + const context &Context = getSyclObjImpl(MainImg)->get_context(); const ContextImplPtr ContextImpl = getSyclObjImpl(Context); const AdapterPtr &Adapter = ContextImpl->getAdapter(); @@ -2578,49 +2692,9 @@ ProgramManager::link(const device_image_plain &DeviceImage, std::shared_ptr> KernelIDs{new std::vector}; std::vector NewSpecConstBlob; device_image_impl::SpecConstMapT NewSpecConstMap; + mergeImageData(Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap); - std::shared_ptr DeviceImageImpl = - getSyclObjImpl(DeviceImage); - - // Duplicates are not expected here, otherwise urProgramLink should fail - KernelIDs->insert(KernelIDs->end(), - DeviceImageImpl->get_kernel_ids_ptr()->begin(), - DeviceImageImpl->get_kernel_ids_ptr()->end()); - - // To be able to answer queries about specialziation constants, the new - // device image should have the specialization constants from all the linked - // images. - { - const std::lock_guard SpecConstLock( - DeviceImageImpl->get_spec_const_data_lock()); - - // Copy all map entries to the new map. Since the blob will be copied to - // the end of the new blob we need to move the blob offset of each entry. - for (const auto &SpecConstIt : DeviceImageImpl->get_spec_const_data_ref()) { - std::vector &NewDescEntries = - NewSpecConstMap[SpecConstIt.first]; - assert(NewDescEntries.empty() && - "Specialization constant already exists in the map."); - NewDescEntries.reserve(SpecConstIt.second.size()); - for (const device_image_impl::SpecConstDescT &SpecConstDesc : - SpecConstIt.second) { - device_image_impl::SpecConstDescT NewSpecConstDesc = SpecConstDesc; - NewSpecConstDesc.BlobOffset += NewSpecConstBlob.size(); - NewDescEntries.push_back(std::move(NewSpecConstDesc)); - } - } - - // Copy the blob from the device image into the new blob. This moves the - // offsets of the following blobs. - NewSpecConstBlob.insert(NewSpecConstBlob.end(), - DeviceImageImpl->get_spec_const_blob_ref().begin(), - DeviceImageImpl->get_spec_const_blob_ref().end()); - } - - // device_image_impl expects kernel ids to be sorted for fast search - std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); - - auto BinImg = getSyclObjImpl(DeviceImage)->get_bin_image_ref(); + auto BinImg = getSyclObjImpl(MainImg)->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared( BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), @@ -2635,32 +2709,51 @@ ProgramManager::link(const device_image_plain &DeviceImage, // The differences are: // Different API - uses different objects to extract required info // Supports caching of a program built for multiple devices -device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, - const std::vector &Devs, - const property_list &PropList) { +device_image_plain +ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, + const std::vector &Devs, + const property_list &PropList) { (void)PropList; - const std::shared_ptr &InputImpl = - getSyclObjImpl(DeviceImage); + const std::shared_ptr &MainInputImpl = + getSyclObjImpl(DevImgWithDeps.getMain()); - const context Context = InputImpl->get_context(); + const context Context = MainInputImpl->get_context(); const ContextImplPtr ContextImpl = getSyclObjImpl(Context); - const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref(); - const RTDeviceBinaryImage &Img = *ImgPtr; - - SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref(); + std::vector BinImgs; + BinImgs.reserve(DevImgWithDeps.size()); + for (const device_image_plain &DevImg : DevImgWithDeps) + BinImgs.push_back(getSyclObjImpl(DevImg)->get_bin_image_ref()); + + std::shared_ptr> KernelIDs; + std::vector SpecConstBlob; + device_image_impl::SpecConstMapT SpecConstMap; + + if (DevImgWithDeps.hasDeps()) { + KernelIDs = std::make_shared>(); + // Sort the images to make the order of spec constant values used for + // caching consistent. + std::vector SortedImgs = DevImgWithDeps.getAll(); + std::sort(SortedImgs.begin(), SortedImgs.end(), + [](const auto &A, const auto &B) { + return getSyclObjImpl(A)->get_bin_image_ref()->getImageID() < + getSyclObjImpl(B)->get_bin_image_ref()->getImageID(); + }); + mergeImageData(SortedImgs, *KernelIDs, SpecConstBlob, SpecConstMap); + } else { + KernelIDs = MainInputImpl->get_kernel_ids_ptr(); + SpecConstBlob = MainInputImpl->get_spec_const_blob_ref(); + SpecConstMap = MainInputImpl->get_spec_const_data_ref(); + } - // TODO: Add support for dynamic linking with kernel bundles ur_program_handle_t ResProgram = - getBuiltURProgram(Img, Context, Devs, /*DeviceImagesToLink*/ {}, {&Img}, - InputImpl, SpecConsts); + getBuiltURProgram(BinImgs, Context, Devs, &DevImgWithDeps, SpecConstBlob); DeviceImageImplPtr ExecImpl = std::make_shared( - InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, - InputImpl->get_kernel_ids_ptr(), ResProgram, - InputImpl->get_spec_const_data_ref(), - InputImpl->get_spec_const_blob_ref()); + MainInputImpl->get_bin_image_ref(), Context, Devs, + bundle_state::executable, std::move(KernelIDs), ResProgram, + std::move(SpecConstMap), std::move(SpecConstBlob)); return createSyclObjFromImpl(ExecImpl); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 75689aadfb290..80a8b755e6f6c 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -87,6 +87,34 @@ enum class DeviceLibExt : std::uint32_t { cl_intel_devicelib_bfloat16, }; +// A helper class for storing image/program objects and their dependencies +// and making their handling a bit more readable. +template class ObjectWithDeps { +public: + ObjectWithDeps(T Main) : Objs({std::move(Main)}) {} + // Assumes 0th element is the main one. + ObjectWithDeps(std::vector AllObjs) : Objs{std::move(AllObjs)} {} + + T &getMain() { return *Objs.begin(); } + const T &getMain() const { return *Objs.begin(); } + const std::vector &getAll() const { return Objs; } + std::size_t size() const { return Objs.size(); } + bool hasDeps() const { return Objs.size() > 1; } + auto begin() { return Objs.begin(); } + auto begin() const { return Objs.begin(); } + auto end() { return Objs.end(); } + auto end() const { return Objs.end(); } + // TODO use a subrange once C++20 is available + auto depsBegin() const { return Objs.begin() + 1; } + auto depsEnd() const { return Objs.end(); } + +private: + std::vector Objs; +}; + +using DevImgPlainWithDeps = ObjectWithDeps; +using BinImgWithDeps = ObjectWithDeps; + // Provides single loading and building OpenCL programs with unique contexts // that is necessary for no interoperability cases with lambda. class ProgramManager { @@ -149,23 +177,19 @@ class ProgramManager { /// Builds a program from a given set of images or retrieves that program from /// cache. - /// \param Img is the main image the program is built with. + /// \param ImgWithDeps is the main image the program is built with and its + /// dependencies. /// \param Context is the context the program is built for. /// \param Devs is a vector of devices the program is built for. - /// \param DeviceImagesToLink is a set of image dependencies required by the - /// main image. - /// \param AllImages is a vector of all images the program is built with. - /// \param DeviceImageImpl is an optional device_image_impl pointer that - /// represents the main image. + /// \param DevImgWithDeps is an optional DevImgPlainWithDeps pointer that + /// represents the images. /// \param SpecConsts is an optional parameter containing spec constant values /// the program should be built with. - ur_program_handle_t getBuiltURProgram( - const RTDeviceBinaryImage &Img, const context &Context, - const std::vector &Devs, - const std::set &DeviceImagesToLink, - const std::vector &AllImages, - const std::shared_ptr &DeviceImageImpl = nullptr, - const SerializedObj &SpecConsts = {}); + ur_program_handle_t + getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, const context &Context, + const std::vector &Devs, + const DevImgPlainWithDeps *DevImgWithDeps = nullptr, + const SerializedObj &SpecConsts = {}); std::tuple @@ -248,25 +272,25 @@ class ProgramManager { // The function returns a vector of SYCL device images that are compiled with // the required state and at least one device from the passed list of devices. - std::vector getSYCLDeviceImagesWithCompatibleState( + std::vector getSYCLDeviceImagesWithCompatibleState( const context &Ctx, const std::vector &Devs, bundle_state TargetState, const std::vector &KernelIDs = {}); // Brind images in the passed vector to the required state. Does it inplace void - bringSYCLDeviceImagesToState(std::vector &DeviceImages, + bringSYCLDeviceImagesToState(std::vector &DeviceImages, bundle_state TargetState); // The function returns a vector of SYCL device images in required state, // which are compatible with at least one of the device from Devs. - std::vector + std::vector getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, bundle_state State); // The function returns a vector of SYCL device images, for which Selector // callable returns true, in required state, which are compatible with at // least one of the device from Devs. - std::vector + std::vector getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, const DevImgSelectorImpl &Selector, bundle_state TargetState); @@ -274,26 +298,26 @@ class ProgramManager { // The function returns a vector of SYCL device images which represent at // least one kernel from kernel ids vector in required state, which are // compatible with at least one of the device from Devs. - std::vector + std::vector getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, const std::vector &KernelIDs, bundle_state TargetState); // Produces new device image by convering input device image to the object // state - device_image_plain compile(const device_image_plain &DeviceImage, - const std::vector &Devs, - const property_list &PropList); + DevImgPlainWithDeps compile(const DevImgPlainWithDeps &ImgWithDeps, + const std::vector &Devs, + const property_list &PropList); // Produces set of device images by convering input device images to object // the executable state - std::vector link(const device_image_plain &DeviceImages, + std::vector link(const DevImgPlainWithDeps &ImgWithDeps, const std::vector &Devs, const property_list &PropList); // Produces new device image by converting input device image to the // executable state - device_image_plain build(const device_image_plain &DeviceImage, + device_image_plain build(const DevImgPlainWithDeps &ImgWithDeps, const std::vector &Devs, const property_list &PropList); @@ -331,13 +355,14 @@ class ProgramManager { /// Add info on kernels using assert into cache void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img); + std::set + collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, - device Dev); - + const device &Dev); std::set collectDependentDeviceImagesForVirtualFunctions( - const RTDeviceBinaryImage &Img, device Dev); + const RTDeviceBinaryImage &Img, const device &Dev); /// The three maps below are used during kernel resolution. Any kernel is /// identified by its name. diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index a5bf9245c6d82..e19c2b9df2a75 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -195,11 +195,11 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, !checkAllDevicesHaveAspect(Devs, aspect::online_linker)) return false; - const std::vector DeviceImages = + const std::vector DeviceImages = detail::ProgramManager::getInstance() .getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, State); - return (bool)DeviceImages.size(); + return !DeviceImages.empty(); } bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, @@ -229,17 +229,19 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, if (!DeviceHasRequireAspectForState) return false; - const std::vector DeviceImages = + const std::vector DeviceImagesWithDeps = detail::ProgramManager::getInstance() .getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, State); std::set CombinedKernelIDs; - for (const device_image_plain &DeviceImage : DeviceImages) { - const std::shared_ptr &DeviceImageImpl = - getSyclObjImpl(DeviceImage); + for (const DevImgPlainWithDeps &DeviceImageWithDeps : DeviceImagesWithDeps) { + for (const device_image_plain &DeviceImage : DeviceImageWithDeps) { + const std::shared_ptr &DeviceImageImpl = + getSyclObjImpl(DeviceImage); - CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ptr()->begin(), - DeviceImageImpl->get_kernel_ids_ptr()->end()); + CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ptr()->begin(), + DeviceImageImpl->get_kernel_ids_ptr()->end()); + } } const bool AllKernelIDsRepresented = diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp new file mode 100644 index 0000000000000..69827d68a7bf3 --- /dev/null +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -0,0 +1,49 @@ +#include "a.hpp" +#include +#include + +#include +#include + +using namespace sycl; + +template class Kernel; +template void runTest(queue &q, T SubmitOp) { + int val = 0; + { + buffer buf(&val, range<1>(1)); + SubmitOp(q, buf); + } + std::cout << "val=" << std::hex << val << "\n"; + assert(val == 0xDCBA); +} + +int main() { + queue q; + runTest(q, [](queue &q, buffer &buf) { + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.single_task>([=]() { acc[0] = levelA(acc[0]); }); + }); + }); + runTest(q, [](queue &q, buffer &buf) { + kernel_bundle KB = get_kernel_bundle( + q.get_context() /*, {get_kernel_id>()}*/); + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.use_kernel_bundle(KB); + cgh.single_task>([=]() { acc[0] = levelA(acc[0]); }); + }); + }); + runTest(q, [](queue &q, buffer &buf) { + kernel_bundle KBInput = get_kernel_bundle( + q.get_context() /*,{get_kernel_id>()}*/); + kernel_bundle KBObject = compile(KBInput); + kernel_bundle KBLinked = link(KBObject); + q.submit([&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.use_kernel_bundle(KBLinked); + cgh.single_task>([=]() { acc[0] = levelA(acc[0]); }); + }); + }); +} \ No newline at end of file diff --git a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp index 8b35cbcf6d64b..5952e4e418935 100644 --- a/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/dynamic.cpp @@ -10,35 +10,10 @@ // RUN: %clangxx %{dynamic_lib_options} %S/Inputs/b.cpp %if windows %{%T/libdevice_c.lib%} -o %T/libdevice_b.%{dynamic_lib_suffix} // RUN: %clangxx %{dynamic_lib_options} %S/Inputs/a.cpp %if windows %{%T/libdevice_b.lib%} -o %T/libdevice_a.%{dynamic_lib_suffix} -// RUN: %{build} -fsycl-allow-device-image-dependencies -I %S/Inputs -o %t.out \ +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -fsycl-allow-device-image-dependencies -fsycl-device-code-split=per_kernel %S/Inputs/basic.cpp -o %t.out \ // RUN: %if windows \ // RUN: %{%T/libdevice_a.lib%} \ // RUN: %else \ // RUN: %{-L%T -ldevice_a -ldevice_b -ldevice_c -ldevice_d -Wl,-rpath=%T%} // RUN: %{run} %t.out - -#include "a.hpp" -#include -#include - -using namespace sycl; - -class ExeKernel; - -int main() { - int val = 0; - { - buffer buf(&val, range<1>(1)); - queue q; - q.submit([&](handler &cgh) { - auto acc = buf.get_access(cgh); - cgh.single_task([=]() { acc[0] = levelA(acc[0]); }); - }); - } - - std::cout << "val=" << std::hex << val << "\n"; - if (val != 0xDCBA) - return (1); - return (0); -} diff --git a/sycl/test-e2e/DeviceImageDependencies/objects.cpp b/sycl/test-e2e/DeviceImageDependencies/objects.cpp index 9cd27f9f9ca6b..17409b209781c 100644 --- a/sycl/test-e2e/DeviceImageDependencies/objects.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/objects.cpp @@ -6,30 +6,5 @@ // RUN: %clangxx -fsycl %S/Inputs/b.cpp -I %S/Inputs -c -o %t_b.o // RUN: %clangxx -fsycl %S/Inputs/c.cpp -I %S/Inputs -c -o %t_c.o // RUN: %clangxx -fsycl %S/Inputs/d.cpp -I %S/Inputs -c -o %t_d.o -// RUN: %{build} -fsycl-allow-device-image-dependencies %t_a.o %t_b.o %t_c.o %t_d.o -I %S/Inputs -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} -fsycl-device-code-split=per_kernel -fsycl-allow-device-image-dependencies %t_a.o %t_b.o %t_c.o %t_d.o %S/Inputs/basic.cpp -o %t.out // RUN: %{run} %t.out - -#include "a.hpp" -#include -#include - -using namespace sycl; - -class ExeKernel; - -int main() { - int val = 0; - { - buffer buf(&val, range<1>(1)); - queue q; - q.submit([&](handler &cgh) { - auto acc = buf.get_access(cgh); - cgh.single_task([=]() { acc[0] = levelA(acc[0]); }); - }); - } - - std::cout << "val=" << std::hex << val << "\n"; - if (val != 0xDCBA) - return (1); - return (0); -} diff --git a/sycl/unittests/program_manager/CMakeLists.txt b/sycl/unittests/program_manager/CMakeLists.txt index 3d706b959f827..65eda325ccec0 100644 --- a/sycl/unittests/program_manager/CMakeLists.txt +++ b/sycl/unittests/program_manager/CMakeLists.txt @@ -2,11 +2,10 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_sycl_unittest(ProgramManagerTests OBJECT CompileTarget.cpp BuildLog.cpp - DynamicLinking.cpp itt_annotations.cpp SubDevices.cpp passing_link_and_compile_options.cpp ) add_subdirectory(arg_mask) - +add_subdirectory(DynamicLinking) \ No newline at end of file diff --git a/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt b/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt new file mode 100644 index 0000000000000..cccc9a9cf1f17 --- /dev/null +++ b/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt @@ -0,0 +1,5 @@ +# These tests introduce images with unresolved dependencies, which can affect other tests if they use kernel bundle API. +# TODO this can be merged back into program manager tests once __sycl_unregister_lib is implemented. +add_sycl_unittest(DynamicLinkingTests OBJECT + DynamicLinking.cpp +) \ No newline at end of file diff --git a/sycl/unittests/program_manager/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp similarity index 54% rename from sycl/unittests/program_manager/DynamicLinking.cpp rename to sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index 82b86843b49c3..e555ef3aae992 100644 --- a/sycl/unittests/program_manager/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -15,6 +15,9 @@ class MutualDepKernelB; class AOTCaseKernel; } // namespace DynamicLinkingTest +const static sycl::specialization_id SpecConst1{1}; +const static sycl::specialization_id SpecConst2{2}; + namespace sycl { inline namespace _V1 { namespace detail { @@ -32,6 +35,14 @@ KERNEL_INFO(MutualDepKernelB) KERNEL_INFO(AOTCaseKernel) #undef KERNEL_INFO + +template <> const char *get_spec_constant_symbolic_ID() { + return "SC1"; +} +template <> const char *get_spec_constant_symbolic_ID() { + return "SC2"; +} + } // namespace detail } // namespace _V1 } // namespace sycl @@ -56,19 +67,20 @@ createPropertySet(const std::vector &Symbols) { return Props; } -sycl::unittest::MockDeviceImage generateImage( - std::initializer_list KernelNames, - const std::vector &ExportedSymbols, - const std::vector &ImportedSymbols, unsigned char Magic, - sycl::detail::ur::DeviceBinaryType BinType = SYCL_DEVICE_BINARY_TYPE_SPIRV, - const char *DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_SPIRV64) { - sycl::unittest::MockPropertySet PropSet; +sycl::unittest::MockDeviceImage +generateImage(std::initializer_list KernelNames, + const std::vector &ExportedSymbols, + const std::vector &ImportedSymbols, + unsigned char Magic, sycl::detail::ur::DeviceBinaryType BinType, + const char *DeviceTargetSpec, + sycl::unittest::MockPropertySet PropSet) { if (!ExportedSymbols.empty()) PropSet.insert(__SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS, createPropertySet(ExportedSymbols)); if (!ImportedSymbols.empty()) PropSet.insert(__SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS, createPropertySet(ImportedSymbols)); + std::vector Bin{Magic}; std::vector Entries = @@ -85,6 +97,32 @@ sycl::unittest::MockDeviceImage generateImage( return Img; } +sycl::unittest::MockDeviceImage generateImage( + std::initializer_list KernelNames, + const std::vector &ExportedSymbols, + const std::vector &ImportedSymbols, unsigned char Magic, + const std::string &SCName, uint32_t SCID, int SCValue, + sycl::detail::ur::DeviceBinaryType BinType = SYCL_DEVICE_BINARY_TYPE_SPIRV, + const char *DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_SPIRV64) { + sycl::unittest::MockPropertySet PropSet; + std::vector SpecConstData; + sycl::unittest::MockProperty SC = sycl::unittest::makeSpecConstant( + SpecConstData, SCName, {SCID}, {0}, {SCValue}); + sycl::unittest::addSpecConstants({SC}, std::move(SpecConstData), PropSet); + return generateImage(KernelNames, ExportedSymbols, ImportedSymbols, Magic, + SYCL_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64, PropSet); +} +sycl::unittest::MockDeviceImage generateImage( + std::initializer_list KernelNames, + const std::vector &ExportedSymbols, + const std::vector &ImportedSymbols, unsigned char Magic, + sycl::detail::ur::DeviceBinaryType BinType = SYCL_DEVICE_BINARY_TYPE_SPIRV, + const char *DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_SPIRV64) { + return generateImage(KernelNames, ExportedSymbols, ImportedSymbols, Magic, + BinType, DeviceTargetSpec, {}); +} + static constexpr unsigned BASIC_CASE_PRG = 2; static constexpr unsigned BASIC_CASE_PRG_DEP = 3; static constexpr unsigned BASIC_CASE_PRG_DEP_NATIVE = 5; @@ -110,10 +148,10 @@ static sycl::unittest::MockDeviceImage Imgs[] = { {"UnresolvedDepKernelUnresolvedDep"}, UNRESOLVED_DEP_PRG), generateImage({"MutualDepKernelA", "MutualDepKernelBDep"}, {"MutualDepKernelBDep"}, {"MutualDepKernelADep"}, - MUTUAL_DEP_PRG_A), + MUTUAL_DEP_PRG_A, "SC1", 0, 1), generateImage({"MutualDepKernelADep", "MutualDepKernelB"}, {"MutualDepKernelADep"}, {"MutualDepKernelBDep"}, - MUTUAL_DEP_PRG_B), + MUTUAL_DEP_PRG_B, "SC2", 1, 2), generateImage({"AOTCaseKernel"}, {}, {"AOTCaseKernelDep"}, AOT_CASE_PRG_NATIVE, SYCL_DEVICE_BINARY_TYPE_NATIVE, __SYCL_DEVICE_BINARY_TARGET_SPIRV64_GEN), @@ -124,6 +162,17 @@ static sycl::unittest::MockDeviceImage Imgs[] = { // Registers mock devices images in the SYCL RT static sycl::unittest::MockDeviceImageArray<9> ImgArray{Imgs}; +void runCommonBasicCaseChecks() { + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); + // The three programs should be linked together. + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( + {BASIC_CASE_PRG, BASIC_CASE_PRG_DEP, BASIC_CASE_PRG_DEP_DEP})); + // And the linked program should be used to create a kernel. + ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + BASIC_CASE_PRG * BASIC_CASE_PRG_DEP * BASIC_CASE_PRG_DEP_DEP); +} + TEST(DynamicLinking, BasicCase) { sycl::unittest::UrMock<> Mock; setupRuntimeLinkingMock(); @@ -134,14 +183,8 @@ TEST(DynamicLinking, BasicCase) { CapturedLinkingData.clear(); Q.single_task([=]() {}); - ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); - // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); - ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( - {BASIC_CASE_PRG, BASIC_CASE_PRG_DEP, BASIC_CASE_PRG_DEP_DEP})); - // And the linked program should be used to create a kernel. - ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, - BASIC_CASE_PRG * BASIC_CASE_PRG_DEP * BASIC_CASE_PRG_DEP_DEP); + + runCommonBasicCaseChecks(); } TEST(DynamicLinking, UnresolvedDep) { @@ -158,6 +201,17 @@ TEST(DynamicLinking, UnresolvedDep) { } } +void runCommonMutualDepTestChecks() { + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); + // Both programs should be linked together. + ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( + {MUTUAL_DEP_PRG_A, MUTUAL_DEP_PRG_B})); + // And the linked program should be used to create a kernel. + ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + MUTUAL_DEP_PRG_A * MUTUAL_DEP_PRG_B); +} + TEST(DynamicLinking, MutualDependency) { sycl::unittest::UrMock<> Mock; setupRuntimeLinkingMock(); @@ -168,14 +222,7 @@ TEST(DynamicLinking, MutualDependency) { CapturedLinkingData.clear(); Q.single_task([=]() {}); - ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); - // Both programs should be linked together. - ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); - ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( - {MUTUAL_DEP_PRG_A, MUTUAL_DEP_PRG_B})); - // And the linked program should be used to create a kernel. - ASSERT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, - MUTUAL_DEP_PRG_A * MUTUAL_DEP_PRG_B); + runCommonMutualDepTestChecks(); CapturedLinkingData.clear(); @@ -236,7 +283,7 @@ TEST(DynamicLinking, UnsupportedCompileExp) { Q.single_task([=]() {}); ASSERT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 3u); - // Both programs should be linked together. + // The three programs should be linked together. ASSERT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); ASSERT_TRUE(CapturedLinkingData.LinkedProgramsContains( {BASIC_CASE_PRG, BASIC_CASE_PRG_DEP, BASIC_CASE_PRG_DEP_DEP})); @@ -245,4 +292,158 @@ TEST(DynamicLinking, UnsupportedCompileExp) { BASIC_CASE_PRG * BASIC_CASE_PRG_DEP * BASIC_CASE_PRG_DEP_DEP); } +template +void testKernelBundleBuild( + std::size_t NKernelIDsExpected, + const std::vector &KernelIDsRequested = { + sycl::get_kernel_id()}) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(Plt.get_devices()[0]); + + CapturedLinkingData.clear(); + + sycl::kernel_bundle KB = + sycl::get_kernel_bundle( + Q.get_context(), KernelIDsRequested); + // Only one linked resulting image expected + EXPECT_EQ(std::distance(KB.begin(), KB.end()), 1); + EXPECT_EQ(KB.get_kernel_ids().size(), NKernelIDsExpected); + + Q.submit([&](sycl::handler &CGH) { + CGH.use_kernel_bundle(KB); + CGH.single_task([=]() {}); + }); +} + +TEST(DynamicLinking, KernelBundleBuild) { + testKernelBundleBuild( + /*NKernelIDsExpected*/ 1u); + runCommonBasicCaseChecks(); +} + +template +void testKernelBundleCompileLink( + long NImagesExpectedBeforeLink, std::size_t NKernelIDsExpected, + const std::vector &KernelIDsRequested = { + sycl::get_kernel_id()}) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(Plt.get_devices()[0]); + + CapturedLinkingData.clear(); + + sycl::kernel_bundle InputKB = + sycl::get_kernel_bundle(Q.get_context(), + KernelIDsRequested); + EXPECT_EQ(std::distance(InputKB.begin(), InputKB.end()), + NImagesExpectedBeforeLink); + EXPECT_EQ(InputKB.get_kernel_ids().size(), NKernelIDsExpected); + + sycl::kernel_bundle ObjectKB = sycl::compile(InputKB); + EXPECT_EQ(std::distance(ObjectKB.begin(), ObjectKB.end()), + NImagesExpectedBeforeLink); + EXPECT_EQ(ObjectKB.get_kernel_ids().size(), NKernelIDsExpected); + + sycl::kernel_bundle LinkedKB = sycl::link({ObjectKB}); + // Only one linked resulting image expected + EXPECT_EQ(std::distance(LinkedKB.begin(), LinkedKB.end()), 1); + EXPECT_EQ(LinkedKB.get_kernel_ids().size(), NKernelIDsExpected); + + Q.submit([&](sycl::handler &CGH) { + CGH.use_kernel_bundle(LinkedKB); + CGH.single_task([=]() {}); + }); +} + +TEST(DynamicLinking, KernelBundleCompileLink) { + testKernelBundleCompileLink( + /*NImagesExpectedBeforeLink*/ 3, /*NKernelIDsExpected*/ 1); + runCommonBasicCaseChecks(); +} + +TEST(DynamicLinking, KernelBundleMutualDep) { + testKernelBundleCompileLink< + DynamicLinkingTest:: + MutualDepKernelA>(/*NImagesExpectedBeforeLink*/ + 2, /*NKernelIDsExpected*/ 2, + {sycl::get_kernel_id< + DynamicLinkingTest::MutualDepKernelA>(), + sycl::get_kernel_id< + DynamicLinkingTest::MutualDepKernelB>()}); + runCommonMutualDepTestChecks(); +} + +// Test that the dependency image and its kernel id are exposed as part of the +// kernel bundle even when not explicitly requested. +TEST(DynamicLinking, KernelBundleMutualDepCompileLinkIndirect) { + testKernelBundleCompileLink< + DynamicLinkingTest:: + MutualDepKernelA>(/*NImagesExpectedBeforeLink*/ + 2, /*NKernelIDsExpected*/ 2, + {sycl::get_kernel_id< + DynamicLinkingTest::MutualDepKernelB>()}); + runCommonMutualDepTestChecks(); +} + +TEST(DynamicLinking, KernelBundleMutualDepBuildIndirect) { + testKernelBundleBuild( + /*NKernelIDsExpected*/ 2u, + {sycl::get_kernel_id()}); + runCommonMutualDepTestChecks(); +} + +TEST(DynamicLinking, KernelBundleSpecConstsCompileLink) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(Plt.get_devices()[0]); + + CapturedLinkingData.clear(); + + sycl::kernel_bundle InputKB = + sycl::get_kernel_bundle( + Q.get_context(), + {sycl::get_kernel_id()}); + EXPECT_EQ(InputKB.get_specialization_constant(), 1); + EXPECT_EQ(InputKB.get_specialization_constant(), 2); + + InputKB.set_specialization_constant(10); + InputKB.set_specialization_constant(20); + sycl::kernel_bundle ObjectKB = sycl::compile(InputKB); + EXPECT_EQ(ObjectKB.get_specialization_constant(), 10); + EXPECT_EQ(ObjectKB.get_specialization_constant(), 20); + + sycl::kernel_bundle LinkedKB = sycl::link({ObjectKB}); + EXPECT_EQ(LinkedKB.get_specialization_constant(), 10); + EXPECT_EQ(LinkedKB.get_specialization_constant(), 20); +} + +TEST(DynamicLinking, KernelBundleSpecConstsBuild) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(Plt.get_devices()[0]); + + CapturedLinkingData.clear(); + + sycl::kernel_bundle InputKB = + sycl::get_kernel_bundle( + Q.get_context(), + {sycl::get_kernel_id()}); + EXPECT_EQ(InputKB.get_specialization_constant(), 1); + EXPECT_EQ(InputKB.get_specialization_constant(), 2); + + InputKB.set_specialization_constant(10); + InputKB.set_specialization_constant(20); + sycl::kernel_bundle BuiltKB = sycl::build(InputKB); + EXPECT_EQ(BuiltKB.get_specialization_constant(), 10); + EXPECT_EQ(BuiltKB.get_specialization_constant(), 20); +} } // anonymous namespace From c6ddd06eca840034130330c92eb98b11b4b90009 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 2 Dec 2024 09:10:52 -0800 Subject: [PATCH 02/10] Fix spec constant tests --- .../unittests/program_manager/DynamicLinking/DynamicLinking.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index e555ef3aae992..552a93869a484 100644 --- a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -111,7 +111,7 @@ sycl::unittest::MockDeviceImage generateImage( sycl::unittest::addSpecConstants({SC}, std::move(SpecConstData), PropSet); return generateImage(KernelNames, ExportedSymbols, ImportedSymbols, Magic, SYCL_DEVICE_BINARY_TYPE_SPIRV, - __SYCL_DEVICE_BINARY_TARGET_SPIRV64, PropSet); + __SYCL_DEVICE_BINARY_TARGET_SPIRV64, std::move(PropSet)); } sycl::unittest::MockDeviceImage generateImage( std::initializer_list KernelNames, From d830f665a4abce18a6ddbcfb85392f2f5224ecec Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 2 Dec 2024 11:21:20 -0800 Subject: [PATCH 03/10] Apply comment + uncomment missed test line --- sycl/source/detail/kernel_bundle_impl.hpp | 12 +++++------- .../DeviceImageDependencies/Inputs/basic.cpp | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 166869cb5035d..9366398a6a05d 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -333,8 +333,7 @@ class kernel_bundle_impl { SpecConst.second.back().Size); } }; - std::for_each(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), - MergeSpecConstants); + std::for_each(begin(), end(), MergeSpecConstants); } for (const detail::KernelBundleImplPtr &Bundle : Bundles) { @@ -752,15 +751,14 @@ class kernel_bundle_impl { bool contains_specialization_constants() const noexcept { return std::any_of( - MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), - [](const device_image_plain &DeviceImage) { + begin(), end(), [](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage)->has_specialization_constants(); }); } bool native_specialization_constant() const noexcept { return contains_specialization_constants() && - std::all_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), + std::all_of(begin(), end(), [](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->all_specialization_constant_native(); @@ -768,7 +766,7 @@ class kernel_bundle_impl { } bool has_specialization_constant(const char *SpecName) const noexcept { - return std::any_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), + return std::any_of(begin(), end(), [SpecName](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->has_specialization_constant(SpecName); @@ -814,7 +812,7 @@ class kernel_bundle_impl { bool is_specialization_constant_set(const char *SpecName) const noexcept { bool SetInDevImg = - std::any_of(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end(), + std::any_of(begin(), end(), [SpecName](const device_image_plain &DeviceImage) { return getSyclObjImpl(DeviceImage) ->is_specialization_constant_set(SpecName); diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp index 69827d68a7bf3..efa2128f15c54 100644 --- a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -28,7 +28,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KB = get_kernel_bundle( - q.get_context() /*, {get_kernel_id>()}*/); + q.get_context(), {get_kernel_id>()}); q.submit([&](handler &cgh) { auto acc = buf.get_access(cgh); cgh.use_kernel_bundle(KB); @@ -37,7 +37,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KBInput = get_kernel_bundle( - q.get_context() /*,{get_kernel_id>()}*/); + q.get_context(), {get_kernel_id>()}); kernel_bundle KBObject = compile(KBInput); kernel_bundle KBLinked = link(KBObject); q.submit([&](handler &cgh) { From 2f29f84b0cd670a25e277caa7f9be5810bfde089 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 04:39:32 -0800 Subject: [PATCH 04/10] Fix test compilation --- sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp index efa2128f15c54..51592dcc5ed2e 100644 --- a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -28,7 +28,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KB = get_kernel_bundle( - q.get_context(), {get_kernel_id>()}); + q.get_context(), {sycl::get_kernel_id>()}); q.submit([&](handler &cgh) { auto acc = buf.get_access(cgh); cgh.use_kernel_bundle(KB); @@ -37,7 +37,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KBInput = get_kernel_bundle( - q.get_context(), {get_kernel_id>()}); + q.get_context(), {sycl::get_kernel_id>()}); kernel_bundle KBObject = compile(KBInput); kernel_bundle KBLinked = link(KBObject); q.submit([&](handler &cgh) { From d44731ce977121e299fde4587ca197a89c6b9048 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 05:49:16 -0800 Subject: [PATCH 05/10] Fix test compilation for real --- sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp index 51592dcc5ed2e..cbbfabea287be 100644 --- a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -1,8 +1,10 @@ #include "a.hpp" -#include + #include +#include #include +#include #include using namespace sycl; @@ -28,7 +30,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KB = get_kernel_bundle( - q.get_context(), {sycl::get_kernel_id>()}); + q.get_context(), {get_kernel_id>()}); q.submit([&](handler &cgh) { auto acc = buf.get_access(cgh); cgh.use_kernel_bundle(KB); @@ -37,7 +39,7 @@ int main() { }); runTest(q, [](queue &q, buffer &buf) { kernel_bundle KBInput = get_kernel_bundle( - q.get_context(), {sycl::get_kernel_id>()}); + q.get_context(), {get_kernel_id>()}); kernel_bundle KBObject = compile(KBInput); kernel_bundle KBLinked = link(KBObject); q.submit([&](handler &cgh) { From 4f2d9eab2297b0a06d455f5e7b1a59ffcad9c161 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 05:59:50 -0800 Subject: [PATCH 06/10] Appease clang-format --- sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp index cbbfabea287be..ebaeced77f752 100644 --- a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -4,8 +4,8 @@ #include #include -#include #include +#include using namespace sycl; From 1fc495a35411a80c72d4a94dad66b127e5c47298 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 06:46:43 -0800 Subject: [PATCH 07/10] Expand unit tests to check spec const blobs --- .../DynamicLinking/DynamicLinking.cpp | 83 +++++++++++++++++-- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index 552a93869a484..eb037a290d4d1 100644 --- a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -397,6 +398,76 @@ TEST(DynamicLinking, KernelBundleMutualDepBuildIndirect) { runCommonMutualDepTestChecks(); } +template +std::shared_ptr +getImage(const sycl::kernel_bundle &KernelBundle, + const sycl::kernel_id &KernelID) { + auto It = + std::find_if(KernelBundle.begin(), KernelBundle.end(), + [&](auto Image) { return Image.has_kernel(KernelID); }); + EXPECT_NE(It, KernelBundle.end()); + return sycl::detail::getSyclObjImpl(*It); +} + +template +void runSpecConstChecksUnlinked( + const sycl::kernel_bundle &KernelBundle) { + EXPECT_EQ(KernelBundle.template get_specialization_constant(), + 10); + EXPECT_EQ(KernelBundle.template get_specialization_constant(), + 20); + // Kernel bundles store spec constant values even if they're not part of any + // images, check image spec const blobs. + std::shared_ptr ImgA = + getImage(KernelBundle, + sycl::get_kernel_id()); + std::vector &BlobA = ImgA->get_spec_const_blob_ref(); + int SpecConstVal1 = *reinterpret_cast(BlobA.data()); + EXPECT_EQ(SpecConstVal1, 10); + std::shared_ptr ImgB = + getImage(KernelBundle, + sycl::get_kernel_id()); + std::vector &BlobB = ImgB->get_spec_const_blob_ref(); + int SpecConstVal2 = *reinterpret_cast(BlobB.data()); + EXPECT_EQ(SpecConstVal2, 20); +} + +void runSpecConstChecksLinked( + const sycl::kernel_bundle &KernelBundle) { + EXPECT_EQ(KernelBundle.get_specialization_constant(), 10); + EXPECT_EQ(KernelBundle.get_specialization_constant(), 20); + // Kernel bundles store spec constant values even if they're not part of any + // images, check image spec const blobs. + std::shared_ptr ImgA = + getImage(KernelBundle, + sycl::get_kernel_id()); + std::shared_ptr ImgB = + getImage(KernelBundle, + sycl::get_kernel_id()); + EXPECT_EQ(ImgA, ImgB); + const std::vector &Blob = ImgA->get_spec_const_blob_ref(); + const sycl::detail::device_image_impl::SpecConstMapT &SpecConstMap = + ImgA->get_spec_const_data_ref(); + + auto It = SpecConstMap.find("SC1"); + ASSERT_NE(It, SpecConstMap.end()); + const std::vector + &SpecConstDesc1 = It->second; + EXPECT_EQ(SpecConstDesc1.size(), 1u); + int SpecConstVal1 = *reinterpret_cast( + Blob.data() + SpecConstDesc1[0].BlobOffset); + EXPECT_EQ(SpecConstVal1, 10); + + It = SpecConstMap.find("SC2"); + ASSERT_NE(It, SpecConstMap.end()); + const std::vector + &SpecConstDesc2 = It->second; + EXPECT_EQ(SpecConstDesc2.size(), 1u); + int SpecConstVal2 = *reinterpret_cast( + Blob.data() + SpecConstDesc2[0].BlobOffset); + EXPECT_EQ(SpecConstVal2, 20); +} + TEST(DynamicLinking, KernelBundleSpecConstsCompileLink) { sycl::unittest::UrMock<> Mock; setupRuntimeLinkingMock(); @@ -415,13 +486,13 @@ TEST(DynamicLinking, KernelBundleSpecConstsCompileLink) { InputKB.set_specialization_constant(10); InputKB.set_specialization_constant(20); + runSpecConstChecksUnlinked(InputKB); + sycl::kernel_bundle ObjectKB = sycl::compile(InputKB); - EXPECT_EQ(ObjectKB.get_specialization_constant(), 10); - EXPECT_EQ(ObjectKB.get_specialization_constant(), 20); + runSpecConstChecksUnlinked(ObjectKB); sycl::kernel_bundle LinkedKB = sycl::link({ObjectKB}); - EXPECT_EQ(LinkedKB.get_specialization_constant(), 10); - EXPECT_EQ(LinkedKB.get_specialization_constant(), 20); + runSpecConstChecksLinked(LinkedKB); } TEST(DynamicLinking, KernelBundleSpecConstsBuild) { @@ -442,8 +513,8 @@ TEST(DynamicLinking, KernelBundleSpecConstsBuild) { InputKB.set_specialization_constant(10); InputKB.set_specialization_constant(20); + runSpecConstChecksUnlinked(InputKB); sycl::kernel_bundle BuiltKB = sycl::build(InputKB); - EXPECT_EQ(BuiltKB.get_specialization_constant(), 10); - EXPECT_EQ(BuiltKB.get_specialization_constant(), 20); + runSpecConstChecksLinked(BuiltKB); } } // anonymous namespace From 3ba307a9963ce735fdb80b5cb90fac5617b3a09e Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 06:48:53 -0800 Subject: [PATCH 08/10] Minor edit --- .../program_manager/DynamicLinking/DynamicLinking.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index eb037a290d4d1..c48a5c1626c69 100644 --- a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -416,8 +416,8 @@ void runSpecConstChecksUnlinked( 10); EXPECT_EQ(KernelBundle.template get_specialization_constant(), 20); - // Kernel bundles store spec constant values even if they're not part of any - // images, check image spec const blobs. + // Kernel bundles store spec constant values even if they're not part of any + // images, check image spec const blobs. std::shared_ptr ImgA = getImage(KernelBundle, sycl::get_kernel_id()); From 68fa6f74a07f28833580ef4fa08a2926e909acb1 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 11:18:03 -0800 Subject: [PATCH 09/10] Add newlines at EOF --- sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp | 2 +- sycl/unittests/program_manager/CMakeLists.txt | 2 +- sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp index ebaeced77f752..ea48700774512 100644 --- a/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp +++ b/sycl/test-e2e/DeviceImageDependencies/Inputs/basic.cpp @@ -48,4 +48,4 @@ int main() { cgh.single_task>([=]() { acc[0] = levelA(acc[0]); }); }); }); -} \ No newline at end of file +} diff --git a/sycl/unittests/program_manager/CMakeLists.txt b/sycl/unittests/program_manager/CMakeLists.txt index 65eda325ccec0..d919de3dd6748 100644 --- a/sycl/unittests/program_manager/CMakeLists.txt +++ b/sycl/unittests/program_manager/CMakeLists.txt @@ -8,4 +8,4 @@ add_sycl_unittest(ProgramManagerTests OBJECT ) add_subdirectory(arg_mask) -add_subdirectory(DynamicLinking) \ No newline at end of file +add_subdirectory(DynamicLinking) diff --git a/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt b/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt index cccc9a9cf1f17..e81ed96c00776 100644 --- a/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt +++ b/sycl/unittests/program_manager/DynamicLinking/CMakeLists.txt @@ -2,4 +2,4 @@ # TODO this can be merged back into program manager tests once __sycl_unregister_lib is implemented. add_sycl_unittest(DynamicLinkingTests OBJECT DynamicLinking.cpp -) \ No newline at end of file +) From 976c35a481f64e90e4e5b5c78e4847dd652fe1d0 Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Tue, 3 Dec 2024 13:46:24 -0800 Subject: [PATCH 10/10] Fix kernel arg mask info --- sycl/source/detail/program_manager/program_manager.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index aabc10d78508c..f6915ec728cfe 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2694,6 +2694,14 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, device_image_impl::SpecConstMapT NewSpecConstMap; mergeImageData(Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap); + { + std::lock_guard Lock(MNativeProgramsMutex); + for (const device_image_plain &Img : ImgWithDeps) { + NativePrograms.insert( + {LinkedProg, getSyclObjImpl(Img)->get_bin_image_ref()}); + } + } + auto BinImg = getSyclObjImpl(MainImg)->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared(