@@ -18,6 +18,9 @@ endif ()
18
18
set (OFFLOAD_UNITTESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
19
19
20
20
function (add_offload_test_device_code test_filename test_name )
21
+ cmake_parse_arguments (
22
+ "ARGS" "WITH_DEVICE_MATH_LIBS" "" "" ${ARGN} )
23
+
21
24
set (SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR} /${test_filename} )
22
25
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY )
23
26
@@ -37,13 +40,25 @@ function(add_offload_test_device_code test_filename test_name)
37
40
endif ()
38
41
39
42
if (nvptx_arch AND CUDAToolkit_FOUND )
43
+ set (nvptx_compile_flags ${ARGS_UNPARSED_ARGUMENTS} )
44
+
45
+ if (ARGS_WITH_DEVICE_MATH_LIBS )
46
+ file (GLOB libdevice_paths "${CUDAToolkit_LIBRARY_ROOT} /nvvm/libdevice/libdevice.*.bc" )
47
+ if (libdevice_paths )
48
+ list (GET libdevice_paths 0 libdevice_path )
49
+ list (APPEND nvptx_compile_flags "-Xclang" "-mlink-builtin-bitcode" )
50
+ list (APPEND nvptx_compile_flags "-Xclang" "${libdevice_path} " )
51
+ list (APPEND nvptx_compile_flags "-DCUDA_MATH_FOUND=1" )
52
+ endif ()
53
+ endif ()
54
+
40
55
set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${test_name} .nvptx64.bin" )
41
56
add_custom_command (
42
57
OUTPUT ${output_file}
43
58
COMMAND ${CMAKE_CXX_COMPILER}
44
59
-I${OFFLOAD_UNITTESTS_DIR}
45
60
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
46
- -nogpulib --cuda-path=${cuda_path} -flto ${ARGN }
61
+ -nogpulib --cuda-path=${cuda_path} -flto ${nvptx_compile_flags }
47
62
${SRC_PATH} -o ${output_file}
48
63
DEPENDS ${SRC_PATH}
49
64
)
@@ -62,13 +77,25 @@ function(add_offload_test_device_code test_filename test_name)
62
77
endif ()
63
78
64
79
if (amdgpu_arch )
80
+ set (amdgpu_compile_flags ${ARGS_UNPARSED_ARGUMENTS} )
81
+
82
+ if (ARGS_WITH_DEVICE_MATH_LIBS )
83
+ find_package (AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm )
84
+ if (AMDDeviceLibs_FOUND )
85
+ get_target_property (ocml_path ocml IMPORTED_LOCATION )
86
+ list (APPEND amdgpu_compile_flags "-Xclang" "-mlink-builtin-bitcode" )
87
+ list (APPEND amdgpu_compile_flags "-Xclang" "${ocml_path} " )
88
+ list (APPEND amdgpu_compile_flags "-DHIP_MATH_FOUND=1" )
89
+ endif ()
90
+ endif ()
91
+
65
92
set (output_file "${CMAKE_CURRENT_BINARY_DIR} /${test_name} .amdgpu.bin" )
66
93
add_custom_command (
67
94
OUTPUT ${output_file}
68
95
COMMAND ${CMAKE_CXX_COMPILER}
69
96
-I${OFFLOAD_UNITTESTS_DIR}
70
97
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
71
- -nogpulib -flto ${ARGN } ${SRC_PATH} -o ${output_file}
98
+ -nogpulib -flto ${amdgpu_compile_flags } ${SRC_PATH} -o ${output_file}
72
99
DEPENDS ${SRC_PATH}
73
100
)
74
101
add_custom_target (${test_name} .amdgpu DEPENDS ${output_file} )
0 commit comments