@@ -42,43 +42,6 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242set (include_directory ${devicertl_base_directory} /include )
4343set (source_directory ${devicertl_base_directory} /src)
4444
45- set (all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
46- "gfx9-generic;gfx900;gfx902;gfx906;gfx908"
47- "gfx90a;gfx90c"
48- "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
49- "gfx10-1-generic;gfx1010;gfx1012"
50- "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
51- "gfx1034;gfx1035;gfx1036"
52- "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
53- "gfx1150;gfx1151;gfx1152;gfx1153"
54- "gfx12-generic" )
55- set (all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
56- "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90" )
57- set (all_gpu_architectures
58- "${all_amdgpu_architectures} ;${all_nvptx_architectures} " )
59-
60- set (LIBOMPTARGET_DEVICE_ARCHITECTURES "all" CACHE STRING
61- "List of device architectures to be used to compile the OpenMP DeviceRTL." )
62-
63- if (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "all" )
64- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_gpu_architectures} )
65- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "amdgpu" )
66- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_amdgpu_architectures} )
67- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "nvptx" )
68- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_nvptx_architectures} )
69- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "auto" OR
70- LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "native" )
71- if (NOT LIBOMPTARGET_NVPTX_ARCH AND NOT LIBOMPTARGET_AMDGPU_ARCH)
72- message (FATAL_ERROR
73- "Could not find 'amdgpu-arch' and 'nvptx-arch' tools required for 'auto'" )
74- elseif (NOT LIBOMPTARGET_FOUND_NVIDIA_GPU AND NOT LIBOMPTARGET_FOUND_AMDGPU_GPU)
75- message (FATAL_ERROR "No AMD or NVIDIA GPU found on the system when using 'auto'" )
76- endif ()
77- set (LIBOMPTARGET_DEVICE_ARCHITECTURES
78- "${LIBOMPTARGET_NVPTX_DETECTED_ARCH_LIST} ;${LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST} " )
79- endif ()
80- list (REMOVE_DUPLICATES LIBOMPTARGET_DEVICE_ARCHITECTURES)
81-
8245set (include_files
8346 ${include_directory} /Allocator.h
8447 ${include_directory} /Configuration .h
@@ -146,20 +109,22 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
146109
147110# first create an object target
148111add_library (omptarget.devicertl.all_objs OBJECT IMPORTED )
149- function (compileDeviceRTLLibrary target_cpu target_name target_triple)
112+ function (compileDeviceRTLLibrary target_name target_triple)
150113 set (target_bc_flags ${ARGN} )
151114
152115 set (bc_files "" )
153116 foreach (src ${src_files} )
154117 get_filename_component (infile ${src} ABSOLUTE )
155118 get_filename_component (outfile ${src} NAME )
156- set (outfile "${outfile} -${target_cpu } .bc" )
119+ set (outfile "${outfile} -${target_name } .bc" )
157120 set (depfile "${outfile} .d" )
158121
122+ # Passing an empty CPU to -march= suppressed target specific metadata.
159123 add_custom_command (OUTPUT ${outfile}
160124 COMMAND ${CLANG_TOOL}
161125 ${bc_flags}
162- --offload-arch=${target_cpu}
126+ -fopenmp-targets=${target_triple}
127+ -Xopenmp-target =${target_triple} -march=
163128 ${target_bc_flags}
164129 -MD -MF ${depfile}
165130 ${infile} -o ${outfile}
@@ -182,7 +147,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
182147 list (APPEND bc_files ${outfile} )
183148 endforeach ()
184149
185- set (bclib_name "libomptarget-${target_name} - ${target_cpu} .bc" )
150+ set (bclib_name "libomptarget-${target_name} .bc" )
186151
187152 # Link to a bitcode library.
188153 add_custom_command (OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /linked_${bclib_name}
@@ -222,7 +187,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
222187 APPEND )
223188 endif ()
224189
225- set (bclib_target_name "omptarget-${target_name} -${target_cpu} - bc" )
190+ set (bclib_target_name "omptarget-${target_name} -bc" )
226191 add_custom_target (${bclib_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR} /${bclib_name} )
227192
228193 # Copy library to destination.
@@ -244,7 +209,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
244209 # Package the bitcode in the bitcode and embed it in an ELF for the static library
245210 add_custom_command (OUTPUT ${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name}
246211 COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name}
247- "--image=file=${CMAKE_CURRENT_BINARY_DIR} /${bclib_name} ,${target_feature} ,triple=${target_triple} ,arch=${target_cpu} ,kind=openmp"
212+ "--image=file=${CMAKE_CURRENT_BINARY_DIR} /${bclib_name} ,${target_feature} ,triple=${target_triple} ,arch=generic ,kind=openmp"
248213 DEPENDS ${CMAKE_CURRENT_BINARY_DIR} /${bclib_name}
249214 COMMENT "Packaging LLVM offloading binary ${bclib_name} .out"
250215 )
@@ -254,14 +219,14 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
254219 APPEND )
255220 endif ()
256221
257- set (output_name "${CMAKE_CURRENT_BINARY_DIR} /devicertl-${target_name} - ${target_cpu} .o" )
222+ set (output_name "${CMAKE_CURRENT_BINARY_DIR} /devicertl-${target_name} .o" )
258223 add_custom_command (OUTPUT ${output_name}
259224 COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib
260225 -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name}
261226 -o ${output_name}
262227 ${source_directory} /Stub.cpp
263228 DEPENDS ${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name} ${source_directory} /Stub.cpp
264- COMMENT "Embedding LLVM offloading binary in devicertl-${target_name} - ${target_cpu} .o"
229+ COMMENT "Embedding LLVM offloading binary in devicertl-${target_name} .o"
265230 VERBATIM
266231 )
267232 if (TARGET clang)
@@ -274,11 +239,11 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
274239 set_property (TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name} )
275240
276241 if (CMAKE_EXPORT_COMPILE_COMMANDS)
277- set (ide_target_name omptarget-ide-${target_name} - ${target_cpu} )
242+ set (ide_target_name omptarget-ide-${target_name} )
278243 add_library (${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files} )
279244 target_compile_options (${ide_target_name} PRIVATE
280- -fopenmp --offload-arch =${target_cpu } -fopenmp-cuda-mode
281- -mllvm -openmp-opt-disable
245+ -fopenmp-targets= ${target_triple} -Xopenmp- target =${target_triple } -march=
246+ -fopenmp -fopenmp-cuda-mode - mllvm -openmp-opt-disable
282247 -foffload-lto -fvisibility=hidden --offload-device-only
283248 -nocudalib -nogpulib -nogpuinc -nostdlibinc -Wno-unknown-cuda-version
284249 )
@@ -293,18 +258,11 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
293258 endif ()
294259endfunction ()
295260
296- # Generate a Bitcode library for all the gpu architectures the user requested.
297- add_custom_target (omptarget.devicertl.nvptx)
298261add_custom_target (omptarget.devicertl.amdgpu)
299- foreach (gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES} )
300- if ("${gpu_arch} " IN_LIST all_amdgpu_architectures)
301- compileDeviceRTLLibrary(${gpu_arch} amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version =none)
302- elseif ("${gpu_arch} " IN_LIST all_nvptx_architectures)
303- compileDeviceRTLLibrary(${gpu_arch} nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
304- else ()
305- message (FATAL_ERROR "Unknown GPU architecture '${gpu_arch} '" )
306- endif ()
307- endforeach ()
262+ compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version =none)
263+
264+ add_custom_target (omptarget.devicertl.nvptx)
265+ compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
308266
309267# Archive all the object files generated above into a static library
310268add_library (omptarget.devicertl STATIC )
0 commit comments