Skip to content

Commit be6f110

Browse files
authored
[OpenMP] Change build of OpenMP device runtime to be a separate runtime (llvm#136729)
Summary: Currently we build the OpenMP device runtime as part of the `offload/` project. This is problematic because it has several restrictions when compared to the normal offloading runtime. It can only be built with an up-to-date clang and we need to set the target appropriately. Currently we hack around this by creating the compiler invocation manually, but this patch moves it into a separate runtimes build. This follows the same build we use for libc, libc++, compiler-rt, and flang-rt. This also moves it from `offload/` into `openmp/` because it is still the `openmp/` runtime and I feel it is more appropriate. We do want a generic `offload/` library at some point, but it would be trivial to then add that as a separate library now that we have the infrastructure that makes adding these new libraries trivial. This most importantly will require that users update their build configs, mostly adding the following lines at a minimum. I was debating whether or not I should 'auto-upgrade' this, but I just went with a warning. ``` -DLLVM_RUNTIME_TARGETS='default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda' \ -DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=openmp \ -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=openmp \ ``` This also changed where the `.bc` version of the library lives, but it's still created.
1 parent cb00c2d commit be6f110

35 files changed

+150
-29
lines changed

offload/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,6 @@ add_subdirectory(tools/offload-tblgen)
371371

372372
# Build offloading plugins and device RTLs if they are available.
373373
add_subdirectory(plugins-nextgen)
374-
add_subdirectory(DeviceRTL)
375374
add_subdirectory(tools)
376375
add_subdirectory(docs)
377376

offload/cmake/caches/AMDGPUBot.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@ set(LLVM_LIT_ARGS "-v --show-unsupported --timeout 100 --show-xfail -j 16" CACHE
1919

2020
set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "")
2121
set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "")
22+
23+
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "")
24+
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "openmp" CACHE STRING "")

offload/cmake/caches/Offload.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
55
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
66
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
77
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
8-
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
9-
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
8+
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
9+
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")

openmp/CMakeLists.txt

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ else()
9999
set(CMAKE_CXX_EXTENSIONS NO)
100100
endif()
101101

102+
# Targeting the GPU directly requires a few flags to make CMake happy.
103+
if("${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn")
104+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
105+
elseif("${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
106+
set(CMAKE_REQUIRED_FLAGS
107+
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
108+
endif()
109+
102110
# Check and set up common compiler flags.
103111
include(config-ix)
104112
include(HandleOpenMPOptions)
@@ -133,35 +141,41 @@ else()
133141
get_clang_resource_dir(LIBOMP_HEADERS_INSTALL_PATH SUBDIR include)
134142
endif()
135143

136-
# Build host runtime library, after LIBOMPTARGET variables are set since they are needed
137-
# to enable time profiling support in the OpenMP runtime.
138-
add_subdirectory(runtime)
139-
140-
set(ENABLE_OMPT_TOOLS ON)
141-
# Currently tools are not tested well on Windows or MacOS X.
142-
if (APPLE OR WIN32)
143-
set(ENABLE_OMPT_TOOLS OFF)
144-
endif()
144+
# Use the current compiler target to determine the appropriate runtime to build.
145+
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx" OR
146+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx")
147+
add_subdirectory(device)
148+
else()
149+
# Build host runtime library, after LIBOMPTARGET variables are set since they
150+
# are needed to enable time profiling support in the OpenMP runtime.
151+
add_subdirectory(runtime)
152+
153+
set(ENABLE_OMPT_TOOLS ON)
154+
# Currently tools are not tested well on Windows or MacOS X.
155+
if (APPLE OR WIN32)
156+
set(ENABLE_OMPT_TOOLS OFF)
157+
endif()
145158

146-
option(OPENMP_ENABLE_OMPT_TOOLS "Enable building ompt based tools for OpenMP."
147-
${ENABLE_OMPT_TOOLS})
148-
if (OPENMP_ENABLE_OMPT_TOOLS)
149-
add_subdirectory(tools)
150-
endif()
159+
option(OPENMP_ENABLE_OMPT_TOOLS "Enable building ompt based tools for OpenMP."
160+
${ENABLE_OMPT_TOOLS})
161+
if (OPENMP_ENABLE_OMPT_TOOLS)
162+
add_subdirectory(tools)
163+
endif()
151164

152-
# Propagate OMPT support to offload
153-
if(NOT ${OPENMP_STANDALONE_BUILD})
154-
set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
155-
set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
156-
endif()
165+
# Propagate OMPT support to offload
166+
if(NOT ${OPENMP_STANDALONE_BUILD})
167+
set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
168+
set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
169+
endif()
157170

158-
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
171+
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
159172

160-
# Build libompd.so
161-
add_subdirectory(libompd)
173+
# Build libompd.so
174+
add_subdirectory(libompd)
162175

163-
# Build documentation
164-
add_subdirectory(docs)
176+
# Build documentation
177+
add_subdirectory(docs)
165178

166-
# Now that we have seen all testsuites, create the check-openmp target.
167-
construct_check_openmp_target()
179+
# Now that we have seen all testsuites, create the check-openmp target.
180+
construct_check_openmp_target()
181+
endif()

openmp/device/CMakeLists.txt

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Ensure the compiler is a valid clang when building the GPU target.
2+
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
3+
if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
4+
${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}"))
5+
message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler "
6+
"'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' "
7+
" is not 'Clang ${req_ver}'.")
8+
endif()
9+
10+
set(src_files
11+
${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp
12+
${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp
13+
${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp
14+
${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp
15+
${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp
16+
${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp
17+
${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp
18+
${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp
19+
${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp
20+
${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp
21+
${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp
22+
${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp
23+
${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp
24+
${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp
25+
${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp
26+
)
27+
28+
list(APPEND compile_options -flto)
29+
list(APPEND compile_options -fvisibility=hidden)
30+
list(APPEND compile_options -nogpulib)
31+
list(APPEND compile_options -nostdlibinc)
32+
list(APPEND compile_options -fno-rtti)
33+
list(APPEND compile_options -fno-exceptions)
34+
list(APPEND compile_options -fconvergent-functions)
35+
list(APPEND compile_options -Wno-unknown-cuda-version)
36+
if(LLVM_DEFAULT_TARGET_TRIPLE)
37+
list(APPEND compile_options --target=${LLVM_DEFAULT_TARGET_TRIPLE})
38+
endif()
39+
40+
# We disable the slp vectorizer during the runtime optimization to avoid
41+
# vectorized accesses to the shared state. Generally, those are "good" but
42+
# the optimizer pipeline (esp. Attributor) does not fully support vectorized
43+
# instructions yet and we end up missing out on way more important constant
44+
# propagation. That said, we will run the vectorizer again after the runtime
45+
# has been linked into the user program.
46+
list(APPEND compile_options "SHELL: -mllvm -vectorize-slp=false")
47+
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
48+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn")
49+
set(target_name "amdgpu")
50+
list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none")
51+
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR
52+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
53+
set(target_name "nvptx")
54+
list(APPEND compile_options --cuda-feature=+ptx63)
55+
endif()
56+
57+
# Trick to combine these into a bitcode file via the linker's LTO pass.
58+
add_executable(libompdevice ${src_files})
59+
set_target_properties(libompdevice PROPERTIES
60+
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
61+
LINKER_LANGUAGE CXX
62+
BUILD_RPATH ""
63+
INSTALL_RPATH ""
64+
RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
65+
66+
# If the user built with the GPU C library enabled we will use that instead.
67+
if(LIBOMPTARGET_GPU_LIBC_SUPPORT)
68+
target_compile_definitions(libompdevice PRIVATE OMPTARGET_HAS_LIBC)
69+
endif()
70+
target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512)
71+
72+
target_include_directories(libompdevice PRIVATE
73+
${CMAKE_CURRENT_SOURCE_DIR}/include
74+
${CMAKE_CURRENT_SOURCE_DIR}/../../libc
75+
${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include)
76+
target_compile_options(libompdevice PRIVATE ${compile_options})
77+
target_link_options(libompdevice PRIVATE
78+
"-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
79+
if(LLVM_DEFAULT_TARGET_TRIPLE)
80+
target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}")
81+
endif()
82+
install(TARGETS libompdevice
83+
PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ
84+
DESTINATION ${OPENMP_INSTALL_LIBDIR})
85+
86+
add_library(ompdevice.all_objs OBJECT IMPORTED)
87+
set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS
88+
${CMAKE_CURRENT_BINARY_DIR}/libomptarget-${target_name}.bc)
89+
90+
# Archive all the object files generated above into a static library
91+
add_library(ompdevice STATIC)
92+
add_dependencies(ompdevice libompdevice)
93+
set_target_properties(ompdevice PROPERTIES
94+
ARCHIVE_OUTPUT_DIRECTORY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}"
95+
LINKER_LANGUAGE CXX
96+
)
97+
target_link_libraries(ompdevice PRIVATE ompdevice.all_objs)
98+
install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)