@@ -18,6 +18,9 @@ endif ()
1818set (OFFLOAD_UNITTESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
1919
2020function (add_offload_test_device_code test_filename test_name)
21+ cmake_parse_arguments (
22+ "ARGS" "WITH_DEVICE_MATH_LIBS" "" "" ${ARGN} )
23+
2124 set (SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR} /${test_filename} )
2225 set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
2326
@@ -37,13 +40,25 @@ function(add_offload_test_device_code test_filename test_name)
3740 endif ()
3841
3942 if (nvptx_arch AND CUDAToolkit_FOUND)
43+ set (nvptx_compile_flags ${ARGS_UNPARSED_ARGUMENTS} )
44+
45+ if (ARGS_WITH_DEVICE_MATH_LIBS)
46+ file (GLOB libdevice_paths "${CUDAToolkit_LIBRARY_ROOT} /nvvm/libdevice/libdevice.*.bc" )
47+ if (libdevice_paths)
48+ list (GET libdevice_paths 0 libdevice_path)
49+ list (APPEND nvptx_compile_flags "-Xclang" "-mlink-builtin-bitcode" )
50+ list (APPEND nvptx_compile_flags "-Xclang" "${libdevice_path} " )
51+ list (APPEND nvptx_compile_flags "-DCUDA_MATH_FOUND=1" )
52+ endif ()
53+ endif ()
54+
4055 set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${test_name} .nvptx64.bin" )
4156 add_custom_command (
4257 OUTPUT ${output_file}
4358 COMMAND ${CMAKE_CXX_COMPILER}
4459 -I${OFFLOAD_UNITTESTS_DIR}
4560 --target =nvptx64-nvidia-cuda -march=${nvptx_arch}
46- -nogpulib --cuda-path =${cuda_path} -flto ${ARGN }
61+ -nogpulib --cuda-path =${cuda_path} -flto ${nvptx_compile_flags }
4762 ${SRC_PATH} -o ${output_file}
4863 DEPENDS ${SRC_PATH}
4964 )
@@ -62,13 +77,25 @@ function(add_offload_test_device_code test_filename test_name)
6277 endif ()
6378
6479 if (amdgpu_arch)
80+ set (amdgpu_compile_flags ${ARGS_UNPARSED_ARGUMENTS} )
81+
82+ if (ARGS_WITH_DEVICE_MATH_LIBS)
83+ find_package (AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
84+ if (AMDDeviceLibs_FOUND)
85+ get_target_property (ocml_path ocml IMPORTED_LOCATION )
86+ list (APPEND amdgpu_compile_flags "-Xclang" "-mlink-builtin-bitcode" )
87+ list (APPEND amdgpu_compile_flags "-Xclang" "${ocml_path} " )
88+ list (APPEND amdgpu_compile_flags "-DHIP_MATH_FOUND=1" )
89+ endif ()
90+ endif ()
91+
6592 set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${test_name} .amdgpu.bin" )
6693 add_custom_command (
6794 OUTPUT ${output_file}
6895 COMMAND ${CMAKE_CXX_COMPILER}
6996 -I${OFFLOAD_UNITTESTS_DIR}
7097 --target =amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
71- -nogpulib -flto ${ARGN } ${SRC_PATH} -o ${output_file}
98+ -nogpulib -flto ${amdgpu_compile_flags } ${SRC_PATH} -o ${output_file}
7299 DEPENDS ${SRC_PATH}
73100 )
74101 add_custom_target (${test_name} .amdgpu DEPENDS ${output_file} )
0 commit comments