From 3641398393de637b3d172bdae52ab127368745e2 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 28 Jan 2025 15:52:08 -0800 Subject: [PATCH 1/3] [flang][cuda] Allow to set the stack limit size --- flang/CMakeLists.txt | 13 +++++----- .../flang/Optimizer/Builder/Runtime/Main.h | 3 ++- flang/include/flang/Runtime/CUDA/init.h | 20 +++++++++++++++ flang/lib/Lower/Bridge.cpp | 4 ++- flang/lib/Optimizer/Builder/Runtime/Main.cpp | 15 ++++++++++- flang/runtime/CUDA/CMakeLists.txt | 1 + flang/runtime/CUDA/init.cpp | 25 +++++++++++++++++++ flang/runtime/environment.cpp | 11 ++++++++ flang/runtime/environment.h | 3 +++ 9 files changed, 86 insertions(+), 9 deletions(-) create mode 100644 flang/include/flang/Runtime/CUDA/init.h create mode 100644 flang/runtime/CUDA/init.cpp diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index b619553ef8302..fb7ab4759ad37 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS) add_compile_definitions(FLANG_INCLUDE_TESTS=1) endif() +option(FLANG_CUF_RUNTIME + "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_CUF_RUNTIME) + find_package(CUDAToolkit REQUIRED) + add_compile_definitions(FLANG_CUDA_SUPPORT=1) +endif() + add_subdirectory(include) add_subdirectory(lib) add_subdirectory(cmake/modules) @@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS) add_subdirectory(tools) endif() -option(FLANG_CUF_RUNTIME - "Compile CUDA Fortran runtime sources" OFF) -if (FLANG_CUF_RUNTIME) - find_package(CUDAToolkit REQUIRED) -endif() - add_subdirectory(runtime) if (LLVM_INCLUDE_EXAMPLES) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h index e4c5dc914c700..a0586deade42a 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h @@ -24,7 +24,8 @@ class GlobalOp; namespace fir::runtime { void genMain(fir::FirOpBuilder &builder, mlir::Location loc, - const std::vector &defs); + const std::vector &defs, + bool initCuda = false); } #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H diff --git a/flang/include/flang/Runtime/CUDA/init.h b/flang/include/flang/Runtime/CUDA/init.h new file mode 100644 index 0000000000000..24bc683822720 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/init.h @@ -0,0 +1,20 @@ +//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_ +#define FORTRAN_RUNTIME_CUDA_INIT_H_ + +#include "common.h" +#include "flang/Runtime/entry-names.h" + +extern "C" { + +void RTDECL(CUFInit)(); +} + +#endif // FORTRAN_RUNTIME_CUDA_INIT_H_ diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index d92dc0cf9abd6..ff80826216e4f 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { if (hasMainProgram) createGlobalOutsideOfFunctionLowering([&]() { fir::runtime::genMain(*builder, toLocation(), - bridge.getEnvironmentDefaults()); + bridge.getEnvironmentDefaults(), + getFoldingContext().languageFeatures().IsEnabled( + Fortran::common::LanguageFeature::CUDA)); }); finalizeOpenACCLowering(); diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp index ab3c4ca81314c..5156fd5402077 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp @@ -16,13 +16,17 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Runtime/main.h" #include "flang/Runtime/stop.h" +#ifdef FLANG_CUDA_SUPPORT +#include "flang/Runtime/CUDA/init.h" +#endif using namespace Fortran::runtime; /// Create a `int main(...)` that calls the Fortran entry point void fir::runtime::genMain( fir::FirOpBuilder &builder, mlir::Location loc, - const std::vector &defs) { + const std::vector &defs, + bool initCuda) { auto *context = builder.getContext(); auto argcTy = builder.getDefaultIntegerType(); auto ptrTy = mlir::LLVM::LLVMPointerType::get(context); @@ -61,6 +65,15 @@ void fir::runtime::genMain( args.push_back(env); builder.create(loc, startFn, args); + +#ifdef FLANG_CUDA_SUPPORT + if (initCuda) { + auto initFn = builder.createFunction( + loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {})); + builder.create(loc, initFn); + } +#endif + builder.create(loc, qqMainFn); builder.create(loc, stopFn); diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 23e01da72eded..bfbae58086c1f 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME} allocator.cpp allocatable.cpp descriptor.cpp + init.cpp kernel.cpp memmove-function.cpp memory.cpp diff --git a/flang/runtime/CUDA/init.cpp b/flang/runtime/CUDA/init.cpp new file mode 100644 index 0000000000000..2bffce842b952 --- /dev/null +++ b/flang/runtime/CUDA/init.cpp @@ -0,0 +1,25 @@ +//===-- runtime/CUDA/init.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/init.h" +#include "../environment.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +extern "C" { + +void RTDEF(CUFInit)() { + // Perform ctx initialization based on execution environment if necessary. + if (Fortran::runtime::executionEnvironment.cudaStackLimit) { + CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize, + Fortran::runtime::executionEnvironment.cudaStackLimit)); + } +} +} diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index 52b1d99ba536e..0f927587fb4f8 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -143,6 +143,17 @@ void ExecutionEnvironment::Configure(int ac, const char *av[], } } + if (auto *x{std::getenv("CUDA_STACKLIMIT")}) { + char *end; + auto n{std::strtol(x, &end, 10)}; + if (n >= 0 && n < std::numeric_limits::max() && *end == '\0') { + cudaStackLimit = n; + } else { + std::fprintf(stderr, + "Fortran runtime: CUDA_STACKLIMIT=%s is invalid; ignored\n", x); + } + } + // TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment } diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index b8b9f10e4e57f..184f0eb8653a6 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -56,6 +56,9 @@ struct ExecutionEnvironment { bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP" bool defaultUTF8{false}; // DEFAULT_UTF8 bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION + + // CUDA Fortran related variables + std::size_t cudaStackLimit{0}; // CUDA_STACKLIMIT }; RT_OFFLOAD_VAR_GROUP_BEGIN From b3bcc4ec56cdee0864a962078f797a8e14583204 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 28 Jan 2025 16:09:19 -0800 Subject: [PATCH 2/3] Update comparison --- flang/runtime/environment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index 0f927587fb4f8..ee2e1e94fa5f3 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -145,8 +145,8 @@ void ExecutionEnvironment::Configure(int ac, const char *av[], if (auto *x{std::getenv("CUDA_STACKLIMIT")}) { char *end; - auto n{std::strtol(x, &end, 10)}; - if (n >= 0 && n < std::numeric_limits::max() && *end == '\0') { + auto n{std::strtoul(x, &end, 10)}; + if (n > 0 && n < std::numeric_limits::max() && *end == '\0') { cudaStackLimit = n; } else { std::fprintf(stderr, From af6368dabbafccd0b59538d4438c06f45362c676 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 28 Jan 2025 16:58:22 -0800 Subject: [PATCH 3/3] Use ACC_OFFLOAD_STACK_SIZE --- flang/runtime/environment.cpp | 5 +++-- flang/runtime/environment.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index ee2e1e94fa5f3..678d8745c9fd7 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -143,14 +143,15 @@ void ExecutionEnvironment::Configure(int ac, const char *av[], } } - if (auto *x{std::getenv("CUDA_STACKLIMIT")}) { + if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) { char *end; auto n{std::strtoul(x, &end, 10)}; if (n > 0 && n < std::numeric_limits::max() && *end == '\0') { cudaStackLimit = n; } else { std::fprintf(stderr, - "Fortran runtime: CUDA_STACKLIMIT=%s is invalid; ignored\n", x); + "Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n", + x); } } diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index 184f0eb8653a6..500aa925a625b 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -57,8 +57,8 @@ struct ExecutionEnvironment { bool defaultUTF8{false}; // DEFAULT_UTF8 bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION - // CUDA Fortran related variables - std::size_t cudaStackLimit{0}; // CUDA_STACKLIMIT + // CUDA related variables + std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE }; RT_OFFLOAD_VAR_GROUP_BEGIN