Skip to content

Commit 654b763

Browse files
authored
[flang][cuda] Allow to set the stack limit size (llvm#124859)
This patch adds a call to the CUFInit function just after `ProgramStart` when CUDA Fortran is enabled to initialize the CUDA context. This allows us to set up some context information like the stack limit that can be defined by an environment variable `ACC_OFFLOAD_STACKSIZE=<value>`.
1 parent 9052b37 commit 654b763

File tree

9 files changed

+87
-9
lines changed

9 files changed

+87
-9
lines changed

flang/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
471471
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
472472
endif()
473473

474+
option(FLANG_CUF_RUNTIME
475+
"Compile CUDA Fortran runtime sources" OFF)
476+
if (FLANG_CUF_RUNTIME)
477+
find_package(CUDAToolkit REQUIRED)
478+
add_compile_definitions(FLANG_CUDA_SUPPORT=1)
479+
endif()
480+
474481
add_subdirectory(include)
475482
add_subdirectory(lib)
476483
add_subdirectory(cmake/modules)
@@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
481488
add_subdirectory(tools)
482489
endif()
483490

484-
option(FLANG_CUF_RUNTIME
485-
"Compile CUDA Fortran runtime sources" OFF)
486-
if (FLANG_CUF_RUNTIME)
487-
find_package(CUDAToolkit REQUIRED)
488-
endif()
489-
490491
add_subdirectory(runtime)
491492

492493
if (LLVM_INCLUDE_EXAMPLES)

flang/include/flang/Optimizer/Builder/Runtime/Main.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ class GlobalOp;
2424
namespace fir::runtime {
2525

2626
void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
27-
const std::vector<Fortran::lower::EnvironmentDefault> &defs);
27+
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
28+
bool initCuda = false);
2829
}
2930

3031
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
10+
#define FORTRAN_RUNTIME_CUDA_INIT_H_
11+
12+
#include "common.h"
13+
#include "flang/Runtime/entry-names.h"
14+
15+
extern "C" {
16+
17+
void RTDECL(CUFInit)();
18+
}
19+
20+
#endif // FORTRAN_RUNTIME_CUDA_INIT_H_

flang/lib/Lower/Bridge.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
459459
if (hasMainProgram)
460460
createGlobalOutsideOfFunctionLowering([&]() {
461461
fir::runtime::genMain(*builder, toLocation(),
462-
bridge.getEnvironmentDefaults());
462+
bridge.getEnvironmentDefaults(),
463+
getFoldingContext().languageFeatures().IsEnabled(
464+
Fortran::common::LanguageFeature::CUDA));
463465
});
464466

465467
finalizeOpenACCLowering();

flang/lib/Optimizer/Builder/Runtime/Main.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@
1616
#include "flang/Optimizer/Dialect/FIRType.h"
1717
#include "flang/Runtime/main.h"
1818
#include "flang/Runtime/stop.h"
19+
#ifdef FLANG_CUDA_SUPPORT
20+
#include "flang/Runtime/CUDA/init.h"
21+
#endif
1922

2023
using namespace Fortran::runtime;
2124

2225
/// Create a `int main(...)` that calls the Fortran entry point
2326
void fir::runtime::genMain(
2427
fir::FirOpBuilder &builder, mlir::Location loc,
25-
const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
28+
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
29+
bool initCuda) {
2630
auto *context = builder.getContext();
2731
auto argcTy = builder.getDefaultIntegerType();
2832
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
@@ -61,6 +65,15 @@ void fir::runtime::genMain(
6165
args.push_back(env);
6266

6367
builder.create<fir::CallOp>(loc, startFn, args);
68+
69+
#ifdef FLANG_CUDA_SUPPORT
70+
if (initCuda) {
71+
auto initFn = builder.createFunction(
72+
loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
73+
builder.create<fir::CallOp>(loc, initFn);
74+
}
75+
#endif
76+
6477
builder.create<fir::CallOp>(loc, qqMainFn);
6578
builder.create<fir::CallOp>(loc, stopFn);
6679

flang/runtime/CUDA/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
1717
allocator.cpp
1818
allocatable.cpp
1919
descriptor.cpp
20+
init.cpp
2021
kernel.cpp
2122
memmove-function.cpp
2223
memory.cpp

flang/runtime/CUDA/init.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Runtime/CUDA/init.h"
10+
#include "../environment.h"
11+
#include "../terminator.h"
12+
#include "flang/Runtime/CUDA/common.h"
13+
14+
#include "cuda_runtime.h"
15+
16+
extern "C" {
17+
18+
void RTDEF(CUFInit)() {
19+
// Perform ctx initialization based on execution environment if necessary.
20+
if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
21+
CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
22+
Fortran::runtime::executionEnvironment.cudaStackLimit));
23+
}
24+
}
25+
}

flang/runtime/environment.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,18 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
143143
}
144144
}
145145

146+
if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) {
147+
char *end;
148+
auto n{std::strtoul(x, &end, 10)};
149+
if (n > 0 && n < std::numeric_limits<std::size_t>::max() && *end == '\0') {
150+
cudaStackLimit = n;
151+
} else {
152+
std::fprintf(stderr,
153+
"Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n",
154+
x);
155+
}
156+
}
157+
146158
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
147159
}
148160

flang/runtime/environment.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct ExecutionEnvironment {
5656
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
5757
bool defaultUTF8{false}; // DEFAULT_UTF8
5858
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
59+
60+
// CUDA related variables
61+
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
5962
};
6063

6164
RT_OFFLOAD_VAR_GROUP_BEGIN

0 commit comments

Comments
 (0)