Skip to content

Commit 3641398

Browse files
committed
[flang][cuda] Allow to set the stack limit size
1 parent 947d8eb commit 3641398

File tree

9 files changed

+86
-9
lines changed

9 files changed

+86
-9
lines changed

flang/CMakeLists.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
471471
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
472472
endif()
473473

474+
option(FLANG_CUF_RUNTIME
475+
"Compile CUDA Fortran runtime sources" OFF)
476+
if (FLANG_CUF_RUNTIME)
477+
find_package(CUDAToolkit REQUIRED)
478+
add_compile_definitions(FLANG_CUDA_SUPPORT=1)
479+
endif()
480+
474481
add_subdirectory(include)
475482
add_subdirectory(lib)
476483
add_subdirectory(cmake/modules)
@@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
481488
add_subdirectory(tools)
482489
endif()
483490

484-
option(FLANG_CUF_RUNTIME
485-
"Compile CUDA Fortran runtime sources" OFF)
486-
if (FLANG_CUF_RUNTIME)
487-
find_package(CUDAToolkit REQUIRED)
488-
endif()
489-
490491
add_subdirectory(runtime)
491492

492493
if (LLVM_INCLUDE_EXAMPLES)

flang/include/flang/Optimizer/Builder/Runtime/Main.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ class GlobalOp;
2424
namespace fir::runtime {
2525

2626
void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
27-
const std::vector<Fortran::lower::EnvironmentDefault> &defs);
27+
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
28+
bool initCuda = false);
2829
}
2930

3031
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
10+
#define FORTRAN_RUNTIME_CUDA_INIT_H_
11+
12+
#include "common.h"
13+
#include "flang/Runtime/entry-names.h"
14+
15+
extern "C" {
16+
17+
void RTDECL(CUFInit)();
18+
}
19+
20+
#endif // FORTRAN_RUNTIME_CUDA_INIT_H_

flang/lib/Lower/Bridge.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
459459
if (hasMainProgram)
460460
createGlobalOutsideOfFunctionLowering([&]() {
461461
fir::runtime::genMain(*builder, toLocation(),
462-
bridge.getEnvironmentDefaults());
462+
bridge.getEnvironmentDefaults(),
463+
getFoldingContext().languageFeatures().IsEnabled(
464+
Fortran::common::LanguageFeature::CUDA));
463465
});
464466

465467
finalizeOpenACCLowering();

flang/lib/Optimizer/Builder/Runtime/Main.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@
1616
#include "flang/Optimizer/Dialect/FIRType.h"
1717
#include "flang/Runtime/main.h"
1818
#include "flang/Runtime/stop.h"
19+
#ifdef FLANG_CUDA_SUPPORT
20+
#include "flang/Runtime/CUDA/init.h"
21+
#endif
1922

2023
using namespace Fortran::runtime;
2124

2225
/// Create a `int main(...)` that calls the Fortran entry point
2326
void fir::runtime::genMain(
2427
fir::FirOpBuilder &builder, mlir::Location loc,
25-
const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
28+
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
29+
bool initCuda) {
2630
auto *context = builder.getContext();
2731
auto argcTy = builder.getDefaultIntegerType();
2832
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
@@ -61,6 +65,15 @@ void fir::runtime::genMain(
6165
args.push_back(env);
6266

6367
builder.create<fir::CallOp>(loc, startFn, args);
68+
69+
#ifdef FLANG_CUDA_SUPPORT
70+
if (initCuda) {
71+
auto initFn = builder.createFunction(
72+
loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
73+
builder.create<fir::CallOp>(loc, initFn);
74+
}
75+
#endif
76+
6477
builder.create<fir::CallOp>(loc, qqMainFn);
6578
builder.create<fir::CallOp>(loc, stopFn);
6679

flang/runtime/CUDA/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
1717
allocator.cpp
1818
allocatable.cpp
1919
descriptor.cpp
20+
init.cpp
2021
kernel.cpp
2122
memmove-function.cpp
2223
memory.cpp

flang/runtime/CUDA/init.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Runtime/CUDA/init.h"
10+
#include "../environment.h"
11+
#include "../terminator.h"
12+
#include "flang/Runtime/CUDA/common.h"
13+
14+
#include "cuda_runtime.h"
15+
16+
extern "C" {
17+
18+
void RTDEF(CUFInit)() {
19+
// Perform ctx initialization based on execution environment if necessary.
20+
if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
21+
CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
22+
Fortran::runtime::executionEnvironment.cudaStackLimit));
23+
}
24+
}
25+
}

flang/runtime/environment.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,17 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
143143
}
144144
}
145145

146+
if (auto *x{std::getenv("CUDA_STACKLIMIT")}) {
147+
char *end;
148+
auto n{std::strtol(x, &end, 10)};
149+
if (n >= 0 && n < std::numeric_limits<int>::max() && *end == '\0') {
150+
cudaStackLimit = n;
151+
} else {
152+
std::fprintf(stderr,
153+
"Fortran runtime: CUDA_STACKLIMIT=%s is invalid; ignored\n", x);
154+
}
155+
}
156+
146157
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
147158
}
148159

flang/runtime/environment.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct ExecutionEnvironment {
5656
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
5757
bool defaultUTF8{false}; // DEFAULT_UTF8
5858
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
59+
60+
// CUDA Fortran related variables
61+
std::size_t cudaStackLimit{0}; // CUDA_STACKLIMIT
5962
};
6063

6164
RT_OFFLOAD_VAR_GROUP_BEGIN

0 commit comments

Comments
 (0)