Skip to content

Commit 85974a0

Browse files
authored
[flang-rt] Add experimental support for GPU build (#131826)
Summary: This patch adds initial support for compiling `flang-rt` directly for the GPU. The method used here matches what's already done for `libc` and `libc++` for the GPU and builds off of those projects. Mainly this requires setting up some flags and setting the sources that currently work. This will deposit the resulting library in the appropriate directory. These files are then intended to be linked via `-Xoffload-linker` support in the offloading driver. ``` lib/clang/21/lib/nvptx64-nvidia-cuda/libflang_rt.runtime.a lib/clang/21/lib/amdgcn-amd-amdhsa/libflang_rt.runtime.a ``` This is obviously missing a lot of functions, mainly the `io` support. Most of what we cannot support is due to using POSIX things that just don't make sense on the GPU. Stuff like `pthreads` or `sema`. Getting unit tests to run on this will also be a challenge. We could run tests the same way we do with `libc`, but the problem there is that the `libc` test suite is freestanding while `gtest` currently doesn't compile on the GPU bcause it uses a lot of weird stuff. If the unit tests were simply `int main` then it would work. I don't understand the actual runtime code very well, I'd appreciate some guidance on how to actually support Fortran IO from this interface. As I understand it, Fortran IO requires a stack-like operation, which conflicts with the SIMT model GPUs use. Worst case scenario we could burn some LDS to keep a stack, or serialize it somehow since we can always just iterate over all the active lanes. Building this right now looks like this, which depends on the arguments added in #131695. ``` -DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx;libcxxabi;flang-rt \ -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx;libcxxabi;flang-rt \ -DRUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER=llvm \ -DRUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBCXX_PROVIDER=llvm \ -DRUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBC_PROVIDER=llvm \ -DRUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBCXX_PROVIDER=llvm ```
1 parent ad9909d commit 85974a0

File tree

5 files changed

+108
-29
lines changed

5 files changed

+108
-29
lines changed

flang-rt/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,15 @@ endif()
221221
# System Introspection #
222222
########################
223223

224+
# The GPU targets require a few mandatory arguments to make the standard CMake
225+
# check flags happy.
226+
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn")
227+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
228+
elseif ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
229+
set(CMAKE_REQUIRED_FLAGS
230+
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
231+
endif()
232+
224233
include(CheckCXXSymbolExists)
225234
include(CheckCXXSourceCompiles)
226235
check_cxx_symbol_exists(strerror_r string.h HAVE_STRERROR_R)

flang-rt/cmake/modules/AddFlangRT.cmake

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,13 @@ function (add_flangrt_library name)
211211
# Minimum required C++ version for Flang-RT, even if CMAKE_CXX_STANDARD is defined to something else.
212212
target_compile_features(${tgtname} PRIVATE cxx_std_17)
213213

214+
# When building the flang runtime if LTO is enabled the archive file
215+
# contains LLVM IR rather than object code. Currently flang is not
216+
# LTO aware so cannot link this file to compiled Fortran code.
217+
if (FLANG_RT_HAS_FNO_LTO_FLAG)
218+
target_compile_options(${tgtname} PRIVATE -fno-lto)
219+
endif ()
220+
214221
# Use compiler-specific options to disable exceptions and RTTI.
215222
if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
216223
target_compile_options(${tgtname} PRIVATE
@@ -226,6 +233,17 @@ function (add_flangrt_library name)
226233
)
227234
endif ()
228235

236+
# Add target specific options if necessary.
237+
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn")
238+
target_compile_options(${tgtname} PRIVATE
239+
$<$<COMPILE_LANGUAGE:CXX>:-nogpulib -flto -fvisibility=hidden>
240+
)
241+
elseif ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
242+
target_compile_options(${tgtname} PRIVATE
243+
$<$<COMPILE_LANGUAGE:CXX>:-nogpulib -flto -fvisibility=hidden -Wno-unknown-cuda-version --cuda-feature=+ptx63>
244+
)
245+
endif ()
246+
229247
# Also for CUDA source when compiling with FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT=CUDA
230248
if (CMAKE_CUDA_COMPILER_ID MATCHES "NVIDIA")
231249
# Assuming gcc as host compiler.
@@ -256,13 +274,6 @@ function (add_flangrt_library name)
256274
target_compile_options(${tgtname} PUBLIC -U_LIBCPP_ENABLE_ASSERTIONS)
257275
endif ()
258276

259-
# When building the flang runtime if LTO is enabled the archive file
260-
# contains LLVM IR rather than object code. Currently flang is not
261-
# LTO aware so cannot link this file to compiled Fortran code.
262-
if (FLANG_RT_HAS_FNO_LTO_FLAG)
263-
target_compile_options(${tgtname} PRIVATE -fno-lto)
264-
endif ()
265-
266277
# Flang/Clang (including clang-cl) -compiled programs targeting the MSVC ABI
267278
# should only depend on msvcrt/ucrt. LLVM still emits libgcc/compiler-rt
268279
# functions in some cases like 128-bit integer math (__udivti3, __modti3,

flang-rt/cmake/modules/HandleLibs.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ elseif (FLANG_RT_LIBCXX_PROVIDER STREQUAL "llvm")
4545
endif ()
4646

4747
if (FLANG_RT_HAS_STDLIB_FLAG)
48-
target_compile_options(flang-rt-libc-headers INTERFACE $<$<COMPILE_LANGUAGE:CXX,C>:-stdlib=libc++>)
48+
target_compile_options(flang-rt-libc-headers INTERFACE
49+
$<$<COMPILE_LANGUAGE:CXX,C>:$<COMPILE_ONLY:-stdlib=libc++>>
50+
)
4951
endif ()
5052
endif ()

flang-rt/lib/runtime/CMakeLists.txt

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ find_package(Backtrace)
1212
set(HAVE_BACKTRACE ${Backtrace_FOUND})
1313
set(BACKTRACE_HEADER ${Backtrace_HEADER})
1414

15-
1615
# List of files that are buildable for all devices.
1716
set(supported_sources
1817
${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp
@@ -88,6 +87,54 @@ set(host_sources
8887
unit-map.cpp
8988
)
9089

90+
# Sources that can be compiled directly for the GPU.
91+
set(gpu_sources
92+
${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp
93+
${FLANG_SOURCE_DIR}/lib/Decimal/decimal-to-binary.cpp
94+
ISO_Fortran_binding.cpp
95+
allocator-registry.cpp
96+
allocatable.cpp
97+
array-constructor.cpp
98+
assign.cpp
99+
buffer.cpp
100+
character.cpp
101+
connection.cpp
102+
copy.cpp
103+
derived-api.cpp
104+
derived.cpp
105+
dot-product.cpp
106+
edit-output.cpp
107+
extrema.cpp
108+
findloc.cpp
109+
format.cpp
110+
inquiry.cpp
111+
internal-unit.cpp
112+
io-error.cpp
113+
iostat.cpp
114+
matmul-transpose.cpp
115+
matmul.cpp
116+
memory.cpp
117+
misc-intrinsic.cpp
118+
non-tbp-dio.cpp
119+
numeric.cpp
120+
pointer.cpp
121+
product.cpp
122+
ragged.cpp
123+
stat.cpp
124+
sum.cpp
125+
support.cpp
126+
terminator.cpp
127+
tools.cpp
128+
transformational.cpp
129+
type-code.cpp
130+
type-info.cpp
131+
utf.cpp
132+
complex-powi.cpp
133+
reduce.cpp
134+
reduction.cpp
135+
temporary-stack.cpp
136+
)
137+
91138
file(GLOB_RECURSE public_headers
92139
"${FLANG_RT_SOURCE_DIR}/include/flang_rt/*.h"
93140
"${FLANG_SOURCE_DIR}/include/flang/Common/*.h"
@@ -124,7 +171,11 @@ else ()
124171
set(f128_sources "")
125172
endif ()
126173

127-
set(sources ${supported_sources} ${host_sources} ${f128_sources})
174+
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
175+
set(sources ${gpu_sources})
176+
else ()
177+
set(sources ${supported_sources} ${host_sources} ${f128_sources})
178+
endif ()
128179

129180

130181
if (NOT WIN32)

flang/cmake/modules/FlangCommon.cmake

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,24 +24,30 @@ if (FLANG_RUNTIME_F128_MATH_LIB)
2424
add_compile_definitions(FLANG_RUNTIME_F128_MATH_LIB="${FLANG_RUNTIME_F128_MATH_LIB}")
2525
endif()
2626

27-
# Check if 128-bit float computations can be done via long double
28-
# Note that '-nostdinc++' might be implied when this code kicks in
29-
# (see 'runtimes/CMakeLists.txt'), so we cannot use 'cfloat' C++ header
30-
# file in the test below.
31-
# Compile it as C.
32-
check_c_source_compiles(
33-
"#include <float.h>
34-
#if LDBL_MANT_DIG != 113
35-
#error LDBL_MANT_DIG != 113
36-
#endif
37-
int main() { return 0; }
38-
"
39-
HAVE_LDBL_MANT_DIG_113)
40-
41-
include(TestBigEndian)
42-
test_big_endian(IS_BIGENDIAN)
43-
if (IS_BIGENDIAN)
44-
add_compile_definitions(FLANG_BIG_ENDIAN=1)
45-
else ()
27+
# The NVPTX target can't emit a binary due to the PTXAS dependency, just
28+
# hard-code this.
29+
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
4630
add_compile_definitions(FLANG_LITTLE_ENDIAN=1)
31+
else ()
32+
# Check if 128-bit float computations can be done via long double
33+
# Note that '-nostdinc++' might be implied when this code kicks in
34+
# (see 'runtimes/CMakeLists.txt'), so we cannot use 'cfloat' C++ header
35+
# file in the test below.
36+
# Compile it as C.
37+
check_c_source_compiles(
38+
"#include <float.h>
39+
#if LDBL_MANT_DIG != 113
40+
#error LDBL_MANT_DIG != 113
41+
#endif
42+
int main() { return 0; }
43+
"
44+
HAVE_LDBL_MANT_DIG_113)
45+
46+
include(TestBigEndian)
47+
test_big_endian(IS_BIGENDIAN)
48+
if (IS_BIGENDIAN)
49+
add_compile_definitions(FLANG_BIG_ENDIAN=1)
50+
else ()
51+
add_compile_definitions(FLANG_LITTLE_ENDIAN=1)
52+
endif ()
4753
endif ()

0 commit comments

Comments
 (0)