From 4ee0932fe9d15526e38e7622d926fca79af870e4 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 14 Mar 2025 16:10:29 +0800 Subject: [PATCH 01/10] Skip unnecessary init for bf16 devicelib image Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 96b7f3bca3f53..81223cde9dd42 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1924,6 +1924,11 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); } + if (EntriesB == EntriesE) { + m_DeviceImages.insert({RawImg, std::move(Img)}); + continue; + } + // Record mapping between virtual function sets and device images for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { From 57b552342388557971d6594c73887da8dcb020f9 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 14 Mar 2025 18:23:33 +0800 Subject: [PATCH 02/10] skip bf16 devicelib image when merging metadata Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 81223cde9dd42..d0e9b5f9863bd 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2834,6 +2834,8 @@ static void mergeImageData(const std::vector &Imgs, device_image_impl::SpecConstMapT &NewSpecConstMap) { for (const device_image_plain &Img : Imgs) { std::shared_ptr DeviceImageImpl = getSyclObjImpl(Img); + if (!DeviceImageImpl->get_kernel_ids_ptr()) + continue; // Duplicates are not expected here, otherwise urProgramLink should fail KernelIDs.insert(KernelIDs.end(), DeviceImageImpl->get_kernel_ids_ptr()->begin(), From 34f396f91f16e0c682b3b476aeb5db8d002a7e84 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 18 Mar 2025 17:33:26 +0800 Subject: [PATCH 03/10] Use Refcount to guard bf16 devicelib Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 135 ++++++++---------- .../program_manager/program_manager.hpp | 1 + 2 files changed, 60 insertions(+), 76 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 853d2ca3622c5..b1332e71aafa1 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include #include #include @@ -1837,84 +1836,27 @@ ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { return {}; } -static bool shouldSkipEmptyImage(sycl_device_binary RawImg, bool IsRTC) { - // For bfloat16 device library image, we should keep it. However, in some - // scenario, __sycl_register_lib can be called multiple times and the same - // bfloat16 device library image may be handled multiple times which is not - // needed. 2 static bool variables are created to record whether native or - // fallback bfloat16 device library image has been handled, if yes, we just - // need to skip it. - // We cannot prevent redundant loads of device library images if they are part - // of a runtime-compiled device binary, as these will be freed when the - // corresponding kernel bundle is destroyed. Hence, normal kernels cannot rely - // on the presence of RTC device library images. +static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { + // For bfloat16 device library image, we should keep it although it doesn't + // include any kernel. sycl_device_binary_property_set ImgPS; - static bool IsNativeBF16DeviceLibHandled = false; - static bool IsFallbackBF16DeviceLibHandled = false; for (ImgPS = RawImg->PropertySetsBegin; ImgPS != RawImg->PropertySetsEnd; ++ImgPS) { if (ImgPS->Name && - !strcmp(__SYCL_PROPERTY_SET_DEVICELIB_METADATA, ImgPS->Name)) { - sycl_device_binary_property ImgP; - for (ImgP = ImgPS->PropertiesBegin; ImgP != ImgPS->PropertiesEnd; - ++ImgP) { - if (ImgP->Name && !strcmp("bfloat16", ImgP->Name) && - (ImgP->Type == SYCL_PROPERTY_TYPE_UINT32)) - break; - } - if (ImgP == ImgPS->PropertiesEnd) - return true; - - // A valid bfloat16 device library image is found here. - // If it originated from RTC, we cannot skip it, but do not mark it as - // being present. - if (IsRTC) - return false; - - // Otherwise, we need to check whether it has been handled already. - uint32_t BF16NativeVal = DeviceBinaryProperty(ImgP).asUint32(); - if (((BF16NativeVal == 0) && IsFallbackBF16DeviceLibHandled) || - ((BF16NativeVal == 1) && IsNativeBF16DeviceLibHandled)) - return true; - - if (BF16NativeVal == 0) - IsFallbackBF16DeviceLibHandled = true; - else - IsNativeBF16DeviceLibHandled = true; - + !strcmp(__SYCL_PROPERTY_SET_DEVICELIB_METADATA, ImgPS->Name)) return false; - } } - return true; -} -static bool isCompiledAtRuntime(sycl_device_binaries DeviceBinary) { - // Check whether the first device binary contains a legacy format offload - // entry with a `$` in its name. - if (DeviceBinary->NumDeviceBinaries > 0) { - sycl_device_binary Binary = DeviceBinary->DeviceBinaries; - if (Binary->EntriesBegin != Binary->EntriesEnd) { - sycl_offload_entry Entry = Binary->EntriesBegin; - if (!Entry->IsNewOffloadEntryType() && - std::string_view{Entry->name}.find('$') != std::string_view::npos) { - return true; - } - } - } - return false; + return true; } void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; - const bool IsRTC = isCompiledAtRuntime(DeviceBinary); for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - // If the image does not contain kernels, skip it unless it is one of the - // bfloat16 device libraries, and it wasn't loaded before or resulted from - // runtime compilation. - if ((EntriesB == EntriesE) && shouldSkipEmptyImage(RawImg, IsRTC)) + if ((EntriesB == EntriesE) && shouldSkipEmptyImage(RawImg)) continue; std::unique_ptr Img; @@ -1946,17 +1888,38 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // Fill maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // For bfloat16 device library image, it doesn't include any kernel, device + // global, virtual function, so just skip adding it to any related maps. + // We only need to: 1). add exported symbols to m_ExportedSymbolImages. 2). + // add the device image to m_DeviceImages used for future clean up when + // removeImage is called. RefCount is used to keep how many user device + // images are depending on native/fallback bfloat16 device library image, + // the corresponding image will be added to m_ExportedSymbolImages and + // m_DeviceImages only when RefCount is 0. These RefCount are used when + // KernelIDsGuard is acquired by current thread. + { + auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); + if (Bfloat16DeviceLibProp.isAvailable()) { + uint32_t IsNative = + DeviceBinaryProperty(*(Bfloat16DeviceLibProp.begin())).asUint32(); + if (!m_Bfloat16DeviceLibRefCount[IsNative]) { + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + } + m_DeviceImages.insert({RawImg, std::move(Img)}); + } + m_Bfloat16DeviceLibRefCount[IsNative] += 1; + continue; + } + } + // Register all exported symbols for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); } - if (EntriesB == EntriesE) { - m_DeviceImages.insert({RawImg, std::move(Img)}); - continue; - } - // Record mapping between virtual function sets and device images for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { @@ -2115,7 +2078,6 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { } void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { - bool IsRTC = isCompiledAtRuntime(DeviceBinary); for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); auto DevImgIt = m_DeviceImages.find(RawImg); @@ -2123,11 +2085,7 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { continue; const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - // Skip clean up if there are no offload entries, unless `DeviceBinary` - // resulted from runtime compilation: Then, this is one of the `bfloat16` - // device libraries, so we want to make sure that the image and its exported - // symbols are removed from the program manager's maps. - if (EntriesB == EntriesE && !IsRTC) + if ((EntriesB == EntriesE) && shouldSkipEmptyImage(RawImg)) continue; RTDeviceBinaryImage *Img = DevImgIt->second.get(); @@ -2138,6 +2096,28 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Acquire lock to modify maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + { + // Clean up Bfloat16 device library image, unregister exported symbols + // and remove the device image only when RefCount is 0. + auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); + if (Bfloat16DeviceLibProp.isAvailable()) { + uint32_t IsNative = + DeviceBinaryProperty(*(Bfloat16DeviceLibProp.begin())).asUint32(); + if (m_Bfloat16DeviceLibRefCount[IsNative] != 0) + m_Bfloat16DeviceLibRefCount[IsNative] -= 1; + if (!m_Bfloat16DeviceLibRefCount[IsNative]) { + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.erase(ESProp->Name); + } + + m_DeviceImages.erase(DevImgIt); + } + + continue; + } + } + // Unmap the unique kernel IDs for the offload entries for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { @@ -2655,7 +2635,10 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( std::shared_ptr> DepKernelIDs; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - DepKernelIDs = m_BinImg2KernelIDs[Dep]; + // For device library images, they are not in m_BinImg2KernelIDs since + // no kernel is included. + if (m_BinImg2KernelIDs.find(Dep) != m_BinImg2KernelIDs.end()) + DepKernelIDs = m_BinImg2KernelIDs[Dep]; } assert(ImgInfoPair.second.State == getBinImageState(Dep) && diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index f2fe311308eee..8534894f78e4e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -498,6 +498,7 @@ class ProgramManager { std::map, ur_kernel_handle_t>; std::unordered_map m_MaterializedKernels; + size_t m_Bfloat16DeviceLibRefCount[2] = {0, 0}; friend class ::ProgramManagerTest; }; } // namespace detail From fcbbe1cdc39dd5841e101e04797c7adf5d065133 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 19 Mar 2025 15:38:14 +0800 Subject: [PATCH 04/10] allow multiple device image for bf16 devicelib symbols Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 72 ++++++++++++------- .../program_manager/program_manager.hpp | 5 +- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index b1332e71aafa1..202e6e5c4b89d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// + #include #include #include @@ -707,6 +708,23 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( if (isSpecialDeviceImage(Img) && !isSpecialDeviceImageShouldBeUsed(Img, Dev)) continue; + { + // m_ExportedSymbolImages is multimap, one symbol may be mapped to + // multiple RTDeviceBianryImage *, this can happen if multiple + // sycl_device_binaries include device image exporting same symbol. + // When resolving imported symbols, we must guarantee that the device + // image to link to solve the imported symbols is from the same + // sycl_device_binaries. Otherwise, we may link a device image from + // another sycl_device_binaries and it may be removed in another thread + // is removeImages is called in another thread. + std::lock_guard ImgStart2DeviceBinsGuard( + m_ImgStart2DeviceBinsMutex); + const unsigned char *MainImgStart = MainImg.getRawData().BinaryStart; + const unsigned char *DepImgStart = Img->getRawData().BinaryStart; + if (m_ImgStart2DeviceBins[MainImgStart] != + m_ImgStart2DeviceBins[DepImgStart]) + continue; + } DeviceImagesToLink.insert(Img); Found = true; for (const sycl_device_binary_property &ISProp : @@ -1888,28 +1906,26 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // Fill maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + { + std::lock_guard ImgStart2DeviceBinsGuard( + m_ImgStart2DeviceBinsMutex); + m_ImgStart2DeviceBins.insert( + {Img->getRawData().BinaryStart, DeviceBinary}); + } + // For bfloat16 device library image, it doesn't include any kernel, device // global, virtual function, so just skip adding it to any related maps. // We only need to: 1). add exported symbols to m_ExportedSymbolImages. 2). // add the device image to m_DeviceImages used for future clean up when - // removeImage is called. RefCount is used to keep how many user device - // images are depending on native/fallback bfloat16 device library image, - // the corresponding image will be added to m_ExportedSymbolImages and - // m_DeviceImages only when RefCount is 0. These RefCount are used when - // KernelIDsGuard is acquired by current thread. + // removeImage is called. { auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); if (Bfloat16DeviceLibProp.isAvailable()) { - uint32_t IsNative = - DeviceBinaryProperty(*(Bfloat16DeviceLibProp.begin())).asUint32(); - if (!m_Bfloat16DeviceLibRefCount[IsNative]) { - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); - } - m_DeviceImages.insert({RawImg, std::move(Img)}); + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); } - m_Bfloat16DeviceLibRefCount[IsNative] += 1; + m_DeviceImages.insert({RawImg, std::move(Img)}); continue; } } @@ -2096,24 +2112,28 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Acquire lock to modify maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + { + std::lock_guard ImgStart2DeviceBinsGuard( + m_ImgStart2DeviceBinsMutex); + m_ImgStart2DeviceBins.erase(Img->getRawData().BinaryStart); + } + { // Clean up Bfloat16 device library image, unregister exported symbols // and remove the device image only when RefCount is 0. auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); if (Bfloat16DeviceLibProp.isAvailable()) { - uint32_t IsNative = - DeviceBinaryProperty(*(Bfloat16DeviceLibProp.begin())).asUint32(); - if (m_Bfloat16DeviceLibRefCount[IsNative] != 0) - m_Bfloat16DeviceLibRefCount[IsNative] -= 1; - if (!m_Bfloat16DeviceLibRefCount[IsNative]) { - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.erase(ESProp->Name); + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + auto Range = m_ExportedSymbolImages.equal_range(ESProp->Name); + for (auto It = Range.first; It != Range.second; ++It) { + if (It->second == Img) { + m_ExportedSymbolImages.erase(It); + break; + } } - - m_DeviceImages.erase(DevImgIt); } - + m_DeviceImages.erase(DevImgIt); continue; } } @@ -2636,7 +2656,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); // For device library images, they are not in m_BinImg2KernelIDs since - // no kernel is included. + // no kernel is included. if (m_BinImg2KernelIDs.find(Dep) != m_BinImg2KernelIDs.end()) DepKernelIDs = m_BinImg2KernelIDs[Dep]; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 8534894f78e4e..4bedb2cfcfe25 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -498,7 +498,10 @@ class ProgramManager { std::map, ur_kernel_handle_t>; std::unordered_map m_MaterializedKernels; - size_t m_Bfloat16DeviceLibRefCount[2] = {0, 0}; + // Maps between device image binary and sycl_device_binaries it belongs to. + std::mutex m_ImgStart2DeviceBinsMutex; + std::unordered_map + m_ImgStart2DeviceBins; friend class ::ProgramManagerTest; }; } // namespace detail From c794cb37535d97d2653b55bc3714b0e9a6c6e525 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 19 Mar 2025 16:12:15 +0800 Subject: [PATCH 05/10] fix comment Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 202e6e5c4b89d..d3ae248dbff07 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2120,7 +2120,7 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { { // Clean up Bfloat16 device library image, unregister exported symbols - // and remove the device image only when RefCount is 0. + // from current device image. auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); if (Bfloat16DeviceLibProp.isAvailable()) { for (const sycl_device_binary_property &ESProp : From db0281d6ce0257dea3a30ae850d7527111d67421 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 19 Mar 2025 18:24:04 +0800 Subject: [PATCH 06/10] remove restriction Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 30 ------------------- .../program_manager/program_manager.hpp | 4 --- 2 files changed, 34 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d3ae248dbff07..33a33be13ac02 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -708,23 +708,6 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( if (isSpecialDeviceImage(Img) && !isSpecialDeviceImageShouldBeUsed(Img, Dev)) continue; - { - // m_ExportedSymbolImages is multimap, one symbol may be mapped to - // multiple RTDeviceBianryImage *, this can happen if multiple - // sycl_device_binaries include device image exporting same symbol. - // When resolving imported symbols, we must guarantee that the device - // image to link to solve the imported symbols is from the same - // sycl_device_binaries. Otherwise, we may link a device image from - // another sycl_device_binaries and it may be removed in another thread - // is removeImages is called in another thread. - std::lock_guard ImgStart2DeviceBinsGuard( - m_ImgStart2DeviceBinsMutex); - const unsigned char *MainImgStart = MainImg.getRawData().BinaryStart; - const unsigned char *DepImgStart = Img->getRawData().BinaryStart; - if (m_ImgStart2DeviceBins[MainImgStart] != - m_ImgStart2DeviceBins[DepImgStart]) - continue; - } DeviceImagesToLink.insert(Img); Found = true; for (const sycl_device_binary_property &ISProp : @@ -1906,13 +1889,6 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // Fill maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - { - std::lock_guard ImgStart2DeviceBinsGuard( - m_ImgStart2DeviceBinsMutex); - m_ImgStart2DeviceBins.insert( - {Img->getRawData().BinaryStart, DeviceBinary}); - } - // For bfloat16 device library image, it doesn't include any kernel, device // global, virtual function, so just skip adding it to any related maps. // We only need to: 1). add exported symbols to m_ExportedSymbolImages. 2). @@ -2112,12 +2088,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Acquire lock to modify maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - { - std::lock_guard ImgStart2DeviceBinsGuard( - m_ImgStart2DeviceBinsMutex); - m_ImgStart2DeviceBins.erase(Img->getRawData().BinaryStart); - } - { // Clean up Bfloat16 device library image, unregister exported symbols // from current device image. diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 4bedb2cfcfe25..f2fe311308eee 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -498,10 +498,6 @@ class ProgramManager { std::map, ur_kernel_handle_t>; std::unordered_map m_MaterializedKernels; - // Maps between device image binary and sycl_device_binaries it belongs to. - std::mutex m_ImgStart2DeviceBinsMutex; - std::unordered_map - m_ImgStart2DeviceBins; friend class ::ProgramManagerTest; }; } // namespace detail From 1351401977bfc2df6cd6eeaf4361d98f6de85d52 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 28 Mar 2025 15:09:32 +0800 Subject: [PATCH 07/10] Create DynRTDeviceBinaryImage for bf16 devicelib Signed-off-by: jinge90 --- .../program_manager/program_manager.cpp | 138 ++++++++++++------ .../program_manager/program_manager.hpp | 8 +- sycl/test-e2e/KernelCompiler/sycl.cpp | 8 +- 3 files changed, 106 insertions(+), 48 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 2b1c610d0fa1c..e42a142eb0e65 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -614,22 +614,24 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, } // Quick check to see whether BinImage is a compiler-generated device image. -static bool isSpecialDeviceImage(RTDeviceBinaryImage *BinImage) { +bool ProgramManager::isSpecialDeviceImage(RTDeviceBinaryImage *BinImage) { // SYCL devicelib image. - if (BinImage->getDeviceLibMetadata().isAvailable()) + if ((m_Bfloat16DeviceLibImages[0].get() == BinImage) || + m_Bfloat16DeviceLibImages[1].get() == BinImage) return true; return false; } -static bool isSpecialDeviceImageShouldBeUsed(RTDeviceBinaryImage *BinImage, - const device &Dev) { +bool ProgramManager::isSpecialDeviceImageShouldBeUsed( + RTDeviceBinaryImage *BinImage, const device &Dev) { // Decide whether a devicelib image should be used. - if (BinImage->getDeviceLibMetadata().isAvailable()) { - const RTDeviceBinaryImage::PropertyRange &DeviceLibMetaProp = - BinImage->getDeviceLibMetadata(); - uint32_t DeviceLibMeta = - DeviceBinaryProperty(*(DeviceLibMetaProp.begin())).asUint32(); + int Bfloat16DeviceLibVersion = -1; + if (m_Bfloat16DeviceLibImages[0].get() == BinImage) + Bfloat16DeviceLibVersion = 0; + if (m_Bfloat16DeviceLibImages[1].get() == BinImage) + Bfloat16DeviceLibVersion = 1; + if (Bfloat16DeviceLibVersion != -1) { // Currently, only bfloat conversion devicelib are supported, so the prop // DeviceLibMeta are only used to represent fallback or native version. // For bfloat16 conversion devicelib, we have fallback and native version. @@ -643,7 +645,8 @@ static bool isSpecialDeviceImageShouldBeUsed(RTDeviceBinaryImage *BinImage, detail::getSyclObjImpl(Dev); std::string NativeBF16ExtName = "cl_intel_bfloat16_conversions"; bool NativeBF16Supported = (DeviceImpl->has_extension(NativeBF16ExtName)); - return NativeBF16Supported == (DeviceLibMeta == DEVICELIB_NATIVE); + return NativeBF16Supported == + (Bfloat16DeviceLibVersion == DEVICELIB_NATIVE); } return false; @@ -1837,17 +1840,53 @@ ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { return {}; } -static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { - // For bfloat16 device library image, we should keep it although it doesn't - // include any kernel. +static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, + uint32_t *LibVersion = nullptr) { sycl_device_binary_property_set ImgPS; for (ImgPS = RawImg->PropertySetsBegin; ImgPS != RawImg->PropertySetsEnd; ++ImgPS) { if (ImgPS->Name && - !strcmp(__SYCL_PROPERTY_SET_DEVICELIB_METADATA, ImgPS->Name)) - return false; + !strcmp(__SYCL_PROPERTY_SET_DEVICELIB_METADATA, ImgPS->Name)) { + if (!LibVersion) + return true; + + *LibVersion = 0; + sycl_device_binary_property ImgP; + for (ImgP = ImgPS->PropertiesBegin; ImgP != ImgPS->PropertiesEnd; + ++ImgP) { + if (ImgP->Name && !strcmp("bfloat16", ImgP->Name) && + (ImgP->Type == SYCL_PROPERTY_TYPE_UINT32)) + break; + } + if (ImgP != ImgPS->PropertiesEnd) + *LibVersion = DeviceBinaryProperty(ImgP).asUint32(); + return true; + } + } + + return false; +} + +static sycl_device_binary_property_set +getExportedSymbolPS(sycl_device_binary RawImg) { + sycl_device_binary_property_set ImgPS; + for (ImgPS = RawImg->PropertySetsBegin; ImgPS != RawImg->PropertySetsEnd; + ++ImgPS) { + if (ImgPS->Name && + !strcmp(__SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS, ImgPS->Name)) + return ImgPS; } + return nullptr; +} + +static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { + // For bfloat16 device library image, we should keep it although it doesn't + // include any kernel. + if (isBfloat16DeviceLibImage(RawImg)) + return false; + + // We may extend the logic here other than bfloat16 device library image. return true; } @@ -1861,6 +1900,8 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { continue; std::unique_ptr Img; + bool IsBfloat16DeviceLib = false; + uint32_t Bfloat16DeviceLibVersion = 0; if (isDeviceImageCompressed(RawImg)) #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE Img = std::make_unique(RawImg); @@ -1870,40 +1911,57 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { "SYCL RT was built without ZSTD support." "Aborting. "); #endif - else - Img = std::make_unique(RawImg); + else { + IsBfloat16DeviceLib = + isBfloat16DeviceLibImage(RawImg, &Bfloat16DeviceLibVersion); + if (!IsBfloat16DeviceLib) + Img = std::make_unique(RawImg); + } static uint32_t SequenceID = 0; - // Fill the kernel argument mask map - const RTDeviceBinaryImage::PropertyRange &KPOIRange = - Img->getKernelParamOptInfo(); - if (KPOIRange.isAvailable()) { - KernelNameToArgMaskMap &ArgMaskMap = - m_EliminatedKernelArgMasks[Img.get()]; - for (const auto &Info : KPOIRange) - ArgMaskMap[Info->Name] = - createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); + // Fill the kernel argument mask map, no need to do this for bfloat16 + // device library image since it doesn't include any kernel. + if (!IsBfloat16DeviceLib) { + const RTDeviceBinaryImage::PropertyRange &KPOIRange = + Img->getKernelParamOptInfo(); + if (KPOIRange.isAvailable()) { + KernelNameToArgMaskMap &ArgMaskMap = + m_EliminatedKernelArgMasks[Img.get()]; + for (const auto &Info : KPOIRange) + ArgMaskMap[Info->Name] = + createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); + } } // Fill maps for kernel bundles std::lock_guard KernelIDsGuard(m_KernelIDsMutex); // For bfloat16 device library image, it doesn't include any kernel, device - // global, virtual function, so just skip adding it to any related maps. We - // only need to 1) add exported symbols to m_ExportedSymbolImages, and 2) - // add the device image to m_Bfloat16DeviceLibImages. + // global, virtual function, so just skip adding it to any related maps. + // The bfloat16 device library are provided by compiler and may be used by + // different sycl device images, program manager will own single copy for + // native and fallback version bfloat16 device library, these device + // library images will not be erased unless program manager is destroyed. { - auto Bfloat16DeviceLibProp = Img->getDeviceLibMetadata(); - if (Bfloat16DeviceLibProp.isAvailable()) { - uint32_t LibVersion = DeviceBinaryProperty(*(Bfloat16DeviceLibProp.begin())).asUint32(); - if (m_Bfloat16DeviceLibImages.count(LibVersion) > 0) + if (IsBfloat16DeviceLib) { + if (m_Bfloat16DeviceLibImages.count(Bfloat16DeviceLibVersion) > 0) continue; - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + size_t ImgSize = + static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); + std::unique_ptr Data(new char[ImgSize]); + std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); + auto DynBfloat16DeviceLibImg = + std::make_unique(std::move(Data), ImgSize); + auto ESPropSet = getExportedSymbolPS(RawImg); + sycl_device_binary_property ESProp; + for (ESProp = ESPropSet->PropertiesBegin; + ESProp != ESPropSet->PropertiesEnd; ++ESProp) { + m_ExportedSymbolImages.insert( + {ESProp->Name, DynBfloat16DeviceLibImg.get()}); } - m_Bfloat16DeviceLibImages.insert({LibVersion, std::move(Img)}); + m_Bfloat16DeviceLibImages.insert( + {Bfloat16DeviceLibVersion, std::move(DynBfloat16DeviceLibImg)}); continue; } } @@ -2824,12 +2882,8 @@ static void mergeImageData(const std::vector &Imgs, for (const device_image_plain &Img : Imgs) { const std::shared_ptr &DeviceImageImpl = getSyclObjImpl(Img); - auto BinImgRef = DeviceImageImpl->get_bin_image_ref(); - // For bfloat16 deice library image, no kernels, spec const are included, - // so we just skip merging data. - if (BinImgRef && BinImgRef->getDeviceLibMetadata().isAvailable()) - continue; // Duplicates are not expected here, otherwise urProgramLink should fail + if (DeviceImageImpl->get_kernel_ids_ptr()) KernelIDs.insert(KernelIDs.end(), DeviceImageImpl->get_kernel_ids_ptr()->begin(), DeviceImageImpl->get_kernel_ids_ptr()->end()); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 6630e832403db..9c6101a54a56f 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -376,11 +376,15 @@ class ProgramManager { collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device &Dev); + bool isSpecialDeviceImage(RTDeviceBinaryImage *BinImage); + bool isSpecialDeviceImageShouldBeUsed(RTDeviceBinaryImage *BinImage, + const device &Dev); + protected: /// The three maps below are used during kernel resolution. Any kernel is /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; - + using DynRTDeviceBinaryImageUPtr = std::unique_ptr; /// Maps names of kernels to their unique kernel IDs. /// TODO: Use std::unordered_set with transparent hash and equality functions /// when C++20 is enabled for the runtime library. @@ -502,7 +506,7 @@ class ProgramManager { // and 1 for native version. These bfloat16 device library images are // provided by compiler long time ago, we expect no further update, so // keeping 1 copy should be OK. - std::unordered_map + std::unordered_map m_Bfloat16DeviceLibImages; friend class ::ProgramManagerTest; diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index 5bd6d3d20525b..dc2875fa8c966 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -533,11 +533,11 @@ int main() { if (!ok) { return -1; } - + // Run test_device_libraries twice to verify bfloat16 device library. return test_build_and_run(q) || test_device_code_split(q) || - test_device_libraries(q) || test_esimd(q) || - test_unsupported_options(q) || test_error(q) || - test_no_visible_ids(q) || test_warning(q); + test_device_libraries(q) || test_device_libraries(q) || + test_device_libraries(q) || test_unsupported_options(q) || + test_error(q) || test_no_visible_ids(q) || test_warning(q); #else static_assert(false, "Kernel Compiler feature test macro undefined"); #endif From 640f679195e92eb83015593af6347443bc25e49d Mon Sep 17 00:00:00 2001 From: jinge90 Date: Fri, 28 Mar 2025 18:43:08 +0800 Subject: [PATCH 08/10] fix clang format Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e42a142eb0e65..3955b493762fd 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2884,9 +2884,9 @@ static void mergeImageData(const std::vector &Imgs, getSyclObjImpl(Img); // Duplicates are not expected here, otherwise urProgramLink should fail if (DeviceImageImpl->get_kernel_ids_ptr()) - KernelIDs.insert(KernelIDs.end(), - DeviceImageImpl->get_kernel_ids_ptr()->begin(), - DeviceImageImpl->get_kernel_ids_ptr()->end()); + KernelIDs.insert(KernelIDs.end(), + DeviceImageImpl->get_kernel_ids_ptr()->begin(), + DeviceImageImpl->get_kernel_ids_ptr()->end()); // To be able to answer queries about specialziation constants, the new // device image should have the specialization constants from all the linked // images. From 24fa45aa4538ab0d0f33d0957308d7ebf900221b Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 2 Apr 2025 11:39:31 +0800 Subject: [PATCH 09/10] Fix sycl.cpp and use std::array to replace map for bf16 device images Signed-off-by: jinge90 --- .../detail/program_manager/program_manager.cpp | 10 ++++++---- .../detail/program_manager/program_manager.hpp | 12 ++++++------ sycl/test-e2e/KernelCompiler/sycl.cpp | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3955b493762fd..1cb9e362ca44e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1945,7 +1945,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // library images will not be erased unless program manager is destroyed. { if (IsBfloat16DeviceLib) { - if (m_Bfloat16DeviceLibImages.count(Bfloat16DeviceLibVersion) > 0) + assert((Bfloat16DeviceLibVersion < 2) && + "Invalid Bfloat16 Device Library Index."); + if (m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion].get()) continue; size_t ImgSize = static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); @@ -1960,8 +1962,8 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { m_ExportedSymbolImages.insert( {ESProp->Name, DynBfloat16DeviceLibImg.get()}); } - m_Bfloat16DeviceLibImages.insert( - {Bfloat16DeviceLibVersion, std::move(DynBfloat16DeviceLibImg)}); + m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion] = + std::move(DynBfloat16DeviceLibImg); continue; } } @@ -2137,7 +2139,7 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { continue; const sycl_offload_entry EntriesB = RawImg->EntriesBegin; const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - if ((EntriesB == EntriesE)) + if (EntriesB == EntriesE) continue; RTDeviceBinaryImage *Img = DevImgIt->second.get(); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 9c6101a54a56f..844ebc9422e2e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -502,12 +503,11 @@ class ProgramManager { std::map, ur_kernel_handle_t>; std::unordered_map m_MaterializedKernels; - // Holds bfloat16 device library images, the key is 0 for fallback version - // and 1 for native version. These bfloat16 device library images are - // provided by compiler long time ago, we expect no further update, so - // keeping 1 copy should be OK. - std::unordered_map - m_Bfloat16DeviceLibImages; + // Holds bfloat16 device library images, the 1st element is for fallback + // version and 2nd is for native version. These bfloat16 device library + // images are provided by compiler long time ago, we expect no further + // update, so keeping 1 copy should be OK. + std::array m_Bfloat16DeviceLibImages; friend class ::ProgramManagerTest; }; diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index dc2875fa8c966..15d2b63cad6bb 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -535,7 +535,7 @@ int main() { } // Run test_device_libraries twice to verify bfloat16 device library. return test_build_and_run(q) || test_device_code_split(q) || - test_device_libraries(q) || test_device_libraries(q) || + test_device_libraries(q) || test_esimd(q) || test_device_libraries(q) || test_unsupported_options(q) || test_error(q) || test_no_visible_ids(q) || test_warning(q); #else From 472d1be219b5fb1fb7467f5b271db83c71176645 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 2 Apr 2025 12:25:14 +0800 Subject: [PATCH 10/10] change default libversion to 2 Signed-off-by: jinge90 --- sycl/source/detail/program_manager/program_manager.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 1cb9e362ca44e..7e946a1e222e1 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -629,8 +629,9 @@ bool ProgramManager::isSpecialDeviceImageShouldBeUsed( int Bfloat16DeviceLibVersion = -1; if (m_Bfloat16DeviceLibImages[0].get() == BinImage) Bfloat16DeviceLibVersion = 0; - if (m_Bfloat16DeviceLibImages[1].get() == BinImage) + else if (m_Bfloat16DeviceLibImages[1].get() == BinImage) Bfloat16DeviceLibVersion = 1; + if (Bfloat16DeviceLibVersion != -1) { // Currently, only bfloat conversion devicelib are supported, so the prop // DeviceLibMeta are only used to represent fallback or native version. @@ -1850,7 +1851,8 @@ static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, if (!LibVersion) return true; - *LibVersion = 0; + // Valid version for bfloat16 device library is 0(fallback), 1(native). + *LibVersion = 2; sycl_device_binary_property ImgP; for (ImgP = ImgPS->PropertiesBegin; ImgP != ImgPS->PropertiesEnd; ++ImgP) {