diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7014949e124a9..ced321b80433d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1912,10 +1912,13 @@ void ProgramManager::addImage(sycl_device_binary RawImg, if (EntriesB == EntriesE && shouldSkipEmptyImage(RawImg)) return; - std::unique_ptr Img; - bool IsBfloat16DeviceLib = false; uint32_t Bfloat16DeviceLibVersion = 0; - if (isDeviceImageCompressed(RawImg)) + const bool IsBfloat16DeviceLib = + isBfloat16DeviceLibImage(RawImg, &Bfloat16DeviceLibVersion); + const bool IsDeviceImageCompressed = isDeviceImageCompressed(RawImg); + + std::unique_ptr Img; + if (IsDeviceImageCompressed) { #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE Img = std::make_unique(RawImg); #else @@ -1924,11 +1927,8 @@ void ProgramManager::addImage(sycl_device_binary RawImg, "SYCL RT was built without ZSTD support." "Aborting. "); #endif - else { - IsBfloat16DeviceLib = - isBfloat16DeviceLibImage(RawImg, &Bfloat16DeviceLibVersion); - if (!IsBfloat16DeviceLib) - Img = std::make_unique(RawImg); + } else if (!IsBfloat16DeviceLib) { + Img = std::make_unique(RawImg); } // If an output image is requested, set it to the newly allocated image. @@ -1966,21 +1966,29 @@ void ProgramManager::addImage(sycl_device_binary RawImg, "Invalid Bfloat16 Device Library Index."); if (m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion].get()) return; - size_t ImgSize = - static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); - std::unique_ptr Data(new char[ImgSize]); - std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); - auto DynBfloat16DeviceLibImg = - std::make_unique(std::move(Data), ImgSize); + + std::unique_ptr DevImg; + if (IsDeviceImageCompressed) { + // Decompress the image. + CheckAndDecompressImage(Img.get()); + DevImg = std::move(Img); + } else { + size_t ImgSize = + static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); + std::unique_ptr Data(new char[ImgSize]); + std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); + DevImg = + std::make_unique(std::move(Data), ImgSize); + } + + // Register export symbols for bfloat16 device library image. auto ESPropSet = getExportedSymbolPS(RawImg); - sycl_device_binary_property ESProp; - for (ESProp = ESPropSet->PropertiesBegin; + for (auto ESProp = ESPropSet->PropertiesBegin; ESProp != ESPropSet->PropertiesEnd; ++ESProp) { - m_ExportedSymbolImages.insert( - {ESProp->Name, DynBfloat16DeviceLibImg.get()}); + m_ExportedSymbolImages.insert({ESProp->Name, DevImg.get()}); } - m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion] = - std::move(DynBfloat16DeviceLibImg); + m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion] = std::move(DevImg); + return; } } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index e0ccabef2860e..7152dc3ebab92 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -541,7 +541,7 @@ class ProgramManager { // version and 2nd is for native version. These bfloat16 device library // images are provided by compiler long time ago, we expect no further // update, so keeping 1 copy should be OK. - std::array m_Bfloat16DeviceLibImages; + std::array m_Bfloat16DeviceLibImages; friend class ::ProgramManagerTest; }; diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.cpp index a5b96ee1c067e..34b5d873d545d 100644 --- a/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.cpp +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.cpp @@ -8,70 +8,10 @@ // REQUIRES: linux // RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so - // RUN: %{build} -DBUILD_EXE -L%T -o %t1.out -l%basename_t -Wl,-rpath=%T // RUN: %{run} %t1.out // UNSUPPORTED: target-nvidia || target-amd // UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia. -#include -#include - -using namespace sycl; - -constexpr access::mode sycl_read = access::mode::read; -constexpr access::mode sycl_write = access::mode::write; - -using BFP = sycl::ext::oneapi::bfloat16; - -#ifdef BUILD_LIB -void foo(queue &deviceQueue) { - BFP bf16_v; - float fp32_v = 16.5f; - { - buffer fp32_buffer{&fp32_v, 1}; - buffer bf16_buffer{&bf16_v, 1}; - deviceQueue - .submit([&](handler &cgh) { - auto fp32_acc = fp32_buffer.template get_access(cgh); - auto bf16_acc = bf16_buffer.template get_access(cgh); - cgh.single_task([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; }); - }) - .wait(); - } - std::cout << "In foo: " << bf16_v << std::endl; -} -#endif - -#ifdef BUILD_EXE -void foo(queue &deviceQueue); -#endif - -int main() { - BFP bf16_array[3]; - float fp32_array[3] = {7.0f, 8.5f, 0.5f}; - - sycl::queue deviceQueue; - { - buffer fp32_buffer{fp32_array, 3}; - buffer bf16_buffer{bf16_array, 3}; - deviceQueue - .submit([&](handler &cgh) { - auto fp32_acc = fp32_buffer.template get_access(cgh); - auto bf16_acc = bf16_buffer.template get_access(cgh); - cgh.single_task([=]() { - bf16_acc[0] = BFP{fp32_acc[0]}; - bf16_acc[1] = BFP{fp32_acc[1]}; - bf16_acc[2] = BFP{fp32_acc[2]}; - }); - }) - .wait(); - } - std::cout << bf16_array[0] << " " << bf16_array[1] << " " << bf16_array[2] - << std::endl; -#ifdef BUILD_EXE - foo(deviceQueue); -#endif - return 0; -} +#include "bfloat16_conversion_test.hpp" diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.hpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.hpp new file mode 100644 index 0000000000000..372da62924867 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test.hpp @@ -0,0 +1,60 @@ +#include +#include + +using namespace sycl; + +constexpr access::mode sycl_read = access::mode::read; +constexpr access::mode sycl_write = access::mode::write; + +using BFP = sycl::ext::oneapi::bfloat16; + +#ifdef BUILD_LIB +void foo(queue &deviceQueue) { + BFP bf16_v; + float fp32_v = 16.5f; + { + buffer fp32_buffer{&fp32_v, 1}; + buffer bf16_buffer{&bf16_v, 1}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.template get_access(cgh); + auto bf16_acc = bf16_buffer.template get_access(cgh); + cgh.single_task([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; }); + }) + .wait(); + } + std::cout << "In foo: " << bf16_v << std::endl; +} +#endif + +#ifdef BUILD_EXE +void foo(queue &deviceQueue); +#endif + +int main() { + BFP bf16_array[3]; + float fp32_array[3] = {7.0f, 8.5f, 0.5f}; + + sycl::queue deviceQueue; + { + buffer fp32_buffer{fp32_array, 3}; + buffer bf16_buffer{bf16_array, 3}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.template get_access(cgh); + auto bf16_acc = bf16_buffer.template get_access(cgh); + cgh.single_task([=]() { + bf16_acc[0] = BFP{fp32_acc[0]}; + bf16_acc[1] = BFP{fp32_acc[1]}; + bf16_acc[2] = BFP{fp32_acc[2]}; + }); + }) + .wait(); + } + std::cout << bf16_array[0] << " " << bf16_array[1] << " " << bf16_array[2] + << std::endl; +#ifdef BUILD_EXE + foo(deviceQueue); +#endif + return 0; +} diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_test_compress.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test_compress.cpp new file mode 100644 index 0000000000000..b5a883e8df97b --- /dev/null +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_test_compress.cpp @@ -0,0 +1,19 @@ +//==-------------- bf1oat16 devicelib test for SYCL JIT --------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check bfloat16 devicelib device image compression. + +// REQUIRES: linux, zstd +// RUN: %{build} --offload-compress -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t_compress.so +// RUN: %{build} --offload-compress -DBUILD_EXE -L%T -o %t1.out -l%basename_t_compress -Wl,-rpath=%T +// RUN: %{run} %t1.out + +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia. + +#include "bfloat16_conversion_test.hpp"