diff --git a/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp new file mode 100644 index 0000000000000..2b5c1a8d1bb74 --- /dev/null +++ b/sycl/test-e2e/DeviceLib/bfloat16_conversion_dlopen_test.cpp @@ -0,0 +1,137 @@ +//==----------- bf1oat16 devicelib dlopen test for SYCL JIT ----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// The case uses dlopen/close to load/unload a sycl shared library which +// depends bfloat16 device library and the main function also includes sycl +// kernels which depend on bfloat16 device library. SYCL program manager will +// own the bfloat16 device library image which is shared by all kernels using +// bfloat16 features, so the program should also work well when the shared +// library is dlclosed and the device images are removed. + +// REQUIRES: linux + +// RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so + +// RUN: %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T -o %t1.out + +// RUN: %{run} %t1.out + +// UNSUPPORTED: target-nvidia || target-amd +// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia. + +#include +#include +#include + +#include +#include + +using namespace sycl; + +constexpr access::mode sycl_read = access::mode::read; +constexpr access::mode sycl_write = access::mode::write; + +using BFP = sycl::ext::oneapi::bfloat16; + +#ifdef BUILD_LIB +class FOO_KERN; +void foo() { + queue deviceQueue; + BFP bf16_v; + float fp32_v = 16.5f; + { + buffer fp32_buffer{&fp32_v, 1}; + buffer bf16_buffer{&bf16_v, 1}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; }); + }) + .wait(); + } + std::cout << "In foo: " << bf16_v << std::endl; +} +#else + +class MAINRUN; +void main_run(queue &deviceQueue) { + BFP bf16_v; + float fp32_v = 16.5f; + { + buffer fp32_buffer{&fp32_v, 1}; + buffer bf16_buffer{&bf16_v, 1}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task( + [=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; }); + }) + .wait(); + } + std::cout << "In run: " << bf16_v << std::endl; +} + +#define STRINGIFY_HELPER(A) #A +#define STRINGIFY(A) STRINGIFY_HELPER(A) +#define SO_FNAME "lib" STRINGIFY(FNAME) ".so" + +int main() { + BFP bf16_array[3]; + float fp32_array[3] = {7.0f, 8.5f, 0.5f}; + queue deviceQueue; + std::vector all_kernel_ids; + bool dynlib_kernel_available = false; + bool dynlib_kernel_unavailable = true; + main_run(deviceQueue); + + void *handle = dlopen(SO_FNAME, RTLD_LAZY); + void (*func)(); + *(void **)(&func) = dlsym(handle, "_Z3foov"); + func(); + all_kernel_ids = sycl::get_kernel_ids(); + for (auto k : all_kernel_ids) { + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) + dynlib_kernel_available = true; + } + + // Before dlclose, the FOO_KERN from sycl dynamic library must exist. + assert(dynlib_kernel_available); + + dlclose(handle); + + all_kernel_ids = sycl::get_kernel_ids(); + for (auto k : all_kernel_ids) { + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) + dynlib_kernel_unavailable = false; + } + + assert(dynlib_kernel_unavailable); + + { + buffer fp32_buffer{fp32_array, 3}; + buffer bf16_buffer{bf16_array, 3}; + deviceQueue + .submit([&](handler &cgh) { + auto fp32_acc = fp32_buffer.get_access(cgh); + auto bf16_acc = bf16_buffer.get_access(cgh); + cgh.single_task([=]() { + bf16_acc[0] = BFP{fp32_acc[0]}; + bf16_acc[1] = BFP{fp32_acc[1]}; + bf16_acc[2] = BFP{fp32_acc[2]}; + }); + }) + .wait(); + } + std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " " + << bf16_array[2] << std::endl; + + return 0; +} +#endif