From 3ce9b0cd5d796cb03ff158bc29beb7ae6fec51e5 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 4 Apr 2025 12:32:48 +0100 Subject: [PATCH] [SYCL][NativeCPU] Process nativecpu_utils with prepare_builtins This ensures that functions have the right linkage. Several functions are marked as used to prevent them from being removed as dead code before the work item loop pass runs. --- libdevice/cmake/modules/SYCLLibdevice.cmake | 26 +++++++++++++++------ libdevice/nativecpu_utils.cpp | 17 +++++++++----- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 39f92b435b48c..84cbb352de8bb 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -315,13 +315,25 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS) endif() # Include NativeCPU UR adapter path to enable finding header file with state struct. # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. - # Todo: add versions for other targets (for cross-compilation) - compile_lib(libsycl-nativecpu_utils - FILETYPE bc - SRC nativecpu_utils.cpp - DEPENDENCIES ${itt_obj_deps} - EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only - -fsycl-device-obj=llvmir) + add_custom_command( + OUTPUT ${bc_binary_dir}/nativecpu_utils.bc + COMMAND ${clang_exe} ${compile_opts} ${bc_device_compile_opts} -fsycl-targets=native_cpu + -I ${NATIVE_CPU_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/nativecpu_utils.cpp + -o ${bc_binary_dir}/nativecpu_utils.bc + MAIN_DEPENDENCY nativecpu_utils.cpp + DEPENDS ${sycl-compiler_deps} + VERBATIM) + add_custom_target(nativecpu_utils-bc DEPENDS ${bc_binary_dir}/nativecpu_utils.bc) + process_bc(libsycl-nativecpu_utils.bc + LIB_TGT libsycl-nativecpu_utils + IN_FILE ${bc_binary_dir}/nativecpu_utils.bc + OUT_DIR ${bc_binary_dir}) + add_custom_target(libsycl-nativecpu_utils-bc DEPENDS ${bc_binary_dir}/libsycl-nativecpu_utils.bc) + add_dependencies(libsycldevice-bc libsycl-nativecpu_utils-bc) + install(FILES ${bc_binary_dir}/libsycl-nativecpu_utils.bc + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) endif() # Add all device libraries for each filetype except for the Intel math function diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index 01e3b13bcb9c6..51ef68cfada96 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -29,6 +29,11 @@ using __nativecpu_state = native_cpu::state; #define DEVICE_EXTERNAL_C DEVICE_EXTERN_C __attribute__((always_inline)) #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((always_inline)) +// Several functions are used implicitly by WorkItemLoopsPass and +// PrepareSYCLNativeCPUPass and need to be marked as used to prevent them being +// removed early. +#define USED __attribute__((used)) + #define OCL_LOCAL __attribute__((opencl_local)) #define OCL_GLOBAL __attribute__((opencl_global)) #define OCL_PRIVATE __attribute__((opencl_private)) @@ -354,7 +359,7 @@ using MakeGlobalType = typename sycl::detail::DecoratedType< T, sycl::access::address_space::global_space>::type; #define DefStateSetWithType(name, field, type) \ - DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C USED void __dpcpp_nativecpu_##name( \ type value, MakeGlobalType<__nativecpu_state> *s) { \ s->field = value; \ } \ @@ -366,7 +371,7 @@ DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t); DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t); #define DefineStateGetWithType(name, field, type) \ - DEVICE_EXTERNAL_C GET_PROPS type __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C GET_PROPS USED type __dpcpp_nativecpu_##name( \ MakeGlobalType *s) { \ return s->field; \ } \ @@ -382,7 +387,7 @@ DefineStateGet_U32(get_max_sub_group_size, SubGroup_size); DefineStateGet_U32(get_num_sub_groups, NumSubGroups); #define DefineStateGetWithType2(name, field, rtype, ptype) \ - DEVICE_EXTERNAL_C GET_PROPS rtype __dpcpp_nativecpu_##name( \ + DEVICE_EXTERNAL_C GET_PROPS USED rtype __dpcpp_nativecpu_##name( \ ptype dim, MakeGlobalType *s) { \ return s->field[dim]; \ } \ @@ -400,9 +405,9 @@ DefineStateGet_U64(get_num_groups, MNumGroups); DefineStateGet_U64(get_wg_size, MWorkGroup_size); DefineStateGet_U64(get_wg_id, MWorkGroup_id); -DEVICE_EXTERNAL_C -void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value, - MakeGlobalType<__nativecpu_state> *s) { +DEVICE_EXTERNAL_C USED void +__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value, + MakeGlobalType<__nativecpu_state> *s) { s->MLocal_id[dim] = value; s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] + s->MLocal_id[dim] + s->MGlobalOffset[dim];