@@ -42,50 +42,6 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242set (include_directory ${devicertl_base_directory} /include )
4343set (source_directory ${devicertl_base_directory} /src)
4444
45- set (all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86 89 87 90)
46- set (all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
47- "gfx9-generic;gfx900;gfx902;gfx906;gfx908"
48- "gfx90a;gfx90c"
49- "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
50- "gfx10-1-generic;gfx1010;gfx1012"
51- "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
52- "gfx1034;gfx1035;gfx1036"
53- "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
54- "gfx1150;gfx1151;gfx1152;gfx1153"
55- "gfx12-generic;gfx1200;gfx1201" )
56- set (all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
57- "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90" )
58- set (all_gpu_architectures
59- "${all_amdgpu_architectures} ;${all_nvptx_architectures} " )
60-
61- # AMD internal build scripts use LIBOMPTARGET_AMDGCN_GFXLIST
62- if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
63- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${LIBOMPTARGET_AMDGCN_GFXLIST} CACHE STRING
64- "List of device architectures to be used to compile the OpenMP DeviceRTL." )
65- else ()
66- set (LIBOMPTARGET_DEVICE_ARCHITECTURES "all" CACHE STRING
67- "List of device architectures to be used to compile the OpenMP DeviceRTL." )
68- endif ()
69-
70- if (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "all" )
71- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_gpu_architectures} )
72- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "amdgpu" )
73- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_amdgpu_architectures} )
74- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "nvptx" )
75- set (LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_nvptx_architectures} )
76- elseif (LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "auto" OR
77- LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "native" )
78- if (NOT LIBOMPTARGET_NVPTX_ARCH AND NOT LIBOMPTARGET_AMDGPU_ARCH)
79- message (FATAL_ERROR
80- "Could not find 'amdgpu-arch' and 'nvptx-arch' tools required for 'auto'" )
81- elseif (NOT LIBOMPTARGET_FOUND_NVIDIA_GPU AND NOT LIBOMPTARGET_FOUND_AMDGPU_GPU)
82- message (FATAL_ERROR "No AMD or NVIDIA GPU found on the system when using 'auto'" )
83- endif ()
84- set (LIBOMPTARGET_DEVICE_ARCHITECTURES
85- "${LIBOMPTARGET_NVPTX_DETECTED_ARCH_LIST} ;${LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST} " )
86- endif ()
87- list (REMOVE_DUPLICATES LIBOMPTARGET_DEVICE_ARCHITECTURES)
88-
8945set (include_files
9046 ${include_directory} /Allocator.h
9147 ${include_directory} /Configuration .h
@@ -170,20 +126,21 @@ endif()
170126
171127# first create an object target
172128add_library (omptarget.devicertl.all_objs OBJECT IMPORTED )
173- function (compileDeviceRTLLibrary target_cpu target_name target_triple)
129+ function (compileDeviceRTLLibrary target_name target_triple)
174130 set (target_bc_flags ${ARGN} )
175131
176132 set (bc_files "" )
177133 foreach (src ${src_files} )
178134 get_filename_component (infile ${src} ABSOLUTE )
179135 get_filename_component (outfile ${src} NAME )
180- set (outfile "${outfile} -${target_cpu } .bc" )
181- set (depfile "${outfile} .d" )
136+ set (outfile "${outfile} -${target_name } .bc" )
137+ set (depfile "${outfile} - ${target_name} .d" )
182138
183139 add_custom_target (${outfile}
184140 COMMAND ${CLANG_TOOL}
185141 ${bc_flags}
186- --offload-arch=${target_cpu}
142+ -fopenmp-targets=${target_triple}
143+ -Xopenmp-target =${target_triple} -march=
187144 ${target_bc_flags}
188145 -MD -MF ${depfile}
189146 ${infile} -o ${outfile}
@@ -194,7 +151,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
194151
195152 if (${outfile} MATCHES "State.cpp" )
196153 # Run the prep tool on the library to replace internal attribute with linkonce_odr for dm_alloc only.
197- set (outfile_prep "${outfile} -${target_cpu } -prep.bc" )
154+ set (outfile_prep "${outfile} -${target_name } -prep.bc" )
198155 add_custom_target (${outfile_prep}
199156 COMMAND ${PREP_TOOL} -dm ${outfile}
200157 -o ${outfile_prep}
@@ -220,21 +177,21 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
220177 if ("${target_name} " STREQUAL "amdgpu" )
221178 if (OPENMP_STANDALONE_BUILD)
222179 list (APPEND extra_bc_files
223- ${CMAKE_BINARY_DIR} /hostexec/libhostexec-${target_cpu } .bc
180+ ${CMAKE_BINARY_DIR} /hostexec/libhostexec-${target_name } .bc
224181 )
225182 else ()
226183 list (APPEND extra_bc_files
227- ${CMAKE_BINARY_DIR} /offload/hostexec/libhostexec-${target_cpu } .bc
184+ ${CMAKE_BINARY_DIR} /offload/hostexec/libhostexec-${target_name } .bc
228185 )
229186 endif ()
230- add_custom_target (libhostexec-${target_cpu }
187+ add_custom_target (libhostexec-${target_name }
231188 DEPENDS ${extra_bc_files}
232189 )
233- add_dependencies (libhostexec-${target_cpu } libhostexec-${target_cpu } .bc)
234- set (extra_depends libhostexec-${target_cpu } )
190+ add_dependencies (libhostexec-${target_name } libhostexec-${target_name } .bc)
191+ set (extra_depends libhostexec-${target_name } )
235192 endif ()
236193
237- set (bclib_name "libomptarget-${target_name} - ${target_cpu} .bc" )
194+ set (bclib_name "libomptarget-${target_name} .bc" )
238195
239196 # Link to a bitcode library.
240197 add_custom_target (linked_${bclib_name}
@@ -293,9 +250,9 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
293250 add_dependencies (${bclib_name} prep-libomptarget-bc)
294251 endif ()
295252
296- add_dependencies (omptarget.devicertl.all_objs embedded_${target_name} - ${target_cpu} )
253+ add_dependencies (omptarget.devicertl.all_objs embedded_${target_name} )
297254
298- set (bclib_target_name "omptarget-${target_name} -${target_cpu} - bc" )
255+ set (bclib_target_name "omptarget-${target_name} -bc" )
299256 add_custom_target (${bclib_target_name} ALL )
300257 add_dependencies (${bclib_target_name} ${bclib_name} )
301258
@@ -321,26 +278,26 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
321278 # Package the bitcode in the bitcode and embed it in an ELF for the static library
322279 add_custom_target (packaged_${bclib_name}
323280 COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name}
324- "--image=file=${CMAKE_CURRENT_BINARY_DIR} /opt_${bclib_name} ,${target_feature} ,triple=${target_triple} ,arch=${target_cpu} ,kind=openmp"
281+ "--image=file=${CMAKE_CURRENT_BINARY_DIR} /opt_${bclib_name} ,${target_feature} ,triple=${target_triple} ,arch=generic ,kind=openmp"
325282 COMMENT "Packaging LLVM offloading binary ${bclib_name} .out"
326283 )
327284 add_dependencies (packaged_${bclib_name} opt_${bclib_name} )
328285 if ("${PACKAGER_TOOL} " STREQUAL "$<TARGET_FILE:clang-offload-packager>" )
329286 add_dependencies (packaged_${bclib_name} clang-offload-packager)
330287 endif ()
331288
332- set (output_name "${CMAKE_CURRENT_BINARY_DIR} /devicertl-${target_name} - ${target_cpu} .o" )
333- add_custom_target (embedded_${target_name} - ${target_cpu}
289+ set (output_name "${CMAKE_CURRENT_BINARY_DIR} /devicertl-${target_name} .o" )
290+ add_custom_target (embedded_${target_name}
334291 COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib
335292 -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR} /packaged_${bclib_name}
336293 -o ${output_name}
337294 ${source_directory} /Stub.cpp
338295 DEPENDS ${source_directory} /Stub.cpp
339296 BYPRODUCTS ${output_name}
340- COMMENT "Embedding LLVM offloading binary in devicertl-${target_name} - ${target_cpu} .o"
297+ COMMENT "Embedding LLVM offloading binary in devicertl-${target_name} .o"
341298 VERBATIM
342299 )
343- add_dependencies (embedded_${target_name} - ${target_cpu} packaged_${bclib_name} )
300+ add_dependencies (embedded_${target_name} packaged_${bclib_name} )
344301 if ("${CLANG_TOOL} " STREQUAL "$<TARGET_FILE:clang>" )
345302 add_dependencies (${output_name} clang)
346303 endif ()
@@ -349,10 +306,11 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
349306 set_property (TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name} )
350307
351308 if (CMAKE_EXPORT_COMPILE_COMMANDS)
352- set (ide_target_name omptarget-ide-${target_name} - ${target_cpu} )
309+ set (ide_target_name omptarget-ide-${target_name} )
353310 add_library (${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files} )
354311 target_compile_options (${ide_target_name} PRIVATE
355- -fopenmp --offload-arch=${target_cpu} -fopenmp-cuda-mode
312+ -fopenmp -fopenmp-cuda-mode
313+ -fopenmp-targets=${target_triple} -Xopenmp-target =${target_triple} -march=
356314 -mllvm -openmp-opt-disable
357315 -foffload-lto -fvisibility=hidden --offload-device-only
358316 -nocudalib -nogpulib -nogpuinc -nostdlibinc -Wno-unknown-cuda-version
@@ -371,32 +329,25 @@ endfunction()
371329# Generate a Bitcode library for all the gpu architectures the user requested.
372330add_custom_target (omptarget.devicertl.nvptx)
373331add_custom_target (omptarget.devicertl.amdgpu)
374- foreach (gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES} )
375- if ("${gpu_arch} " IN_LIST all_amdgpu_architectures)
376- find_package (AMDDeviceLibs REQUIRED CONFIG
377- HINTS ${CMAKE_INSTALL_PREFIX}
378- ${CMAKE_BINARY_DIR} /../../tools/rocm-device-libs
379- PATHS /opt/rocm
380- )
381-
382- # Link in the ROCm Device Libraries once the other files have been linked.
383- get_target_property (ocml_path ocml IMPORTED_LOCATION )
384- get_target_property (ockl_path ockl IMPORTED_LOCATION )
385-
386- set (amd_options -Xclang -mcode-object-version =none
387- -Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}
388- -Xclang -mlink-builtin-bitcode -Xclang ${ockl_path}
389- -Wno-linker-warnings # Silence the empty host compilation.
390- -Xclang -mcode-object-version =none
391- )
332+ find_package (AMDDeviceLibs REQUIRED CONFIG
333+ HINTS ${CMAKE_INSTALL_PREFIX}
334+ ${CMAKE_BINARY_DIR} /../../tools/rocm-device-libs
335+ PATHS /opt/rocm
336+ )
392337
393- compileDeviceRTLLibrary(${gpu_arch} amdgpu amdgcn-amd-amdhsa ${amd_options} )
394- elseif ("${gpu_arch} " IN_LIST all_nvptx_architectures)
395- compileDeviceRTLLibrary(${gpu_arch} nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
396- else ()
397- message (FATAL_ERROR "Unknown GPU architecture '${gpu_arch} '" )
398- endif ()
399- endforeach ()
338+ # Link in the ROCm Device Libraries once the other files have been linked.
339+ get_target_property (ocml_path ocml IMPORTED_LOCATION )
340+ get_target_property (ockl_path ockl IMPORTED_LOCATION )
341+
342+ set (amd_options -Xclang -mcode-object-version =none
343+ -Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}
344+ -Xclang -mlink-builtin-bitcode -Xclang ${ockl_path}
345+ -Wno-linker-warnings # Silence the empty host compilation.
346+ -Xclang -mcode-object-version =none
347+ )
348+
349+ compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa ${amd_options} )
350+ compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
400351
401352# Archive all the object files generated above into a static library
402353add_library (omptarget.devicertl STATIC )
0 commit comments