diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index b619553ef8302..fb7ab4759ad37 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS) add_compile_definitions(FLANG_INCLUDE_TESTS=1) endif() +option(FLANG_CUF_RUNTIME + "Compile CUDA Fortran runtime sources" OFF) +if (FLANG_CUF_RUNTIME) + find_package(CUDAToolkit REQUIRED) + add_compile_definitions(FLANG_CUDA_SUPPORT=1) +endif() + add_subdirectory(include) add_subdirectory(lib) add_subdirectory(cmake/modules) @@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS) add_subdirectory(tools) endif() -option(FLANG_CUF_RUNTIME - "Compile CUDA Fortran runtime sources" OFF) -if (FLANG_CUF_RUNTIME) - find_package(CUDAToolkit REQUIRED) -endif() - add_subdirectory(runtime) if (LLVM_INCLUDE_EXAMPLES) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h index e4c5dc914c700..a0586deade42a 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h @@ -24,7 +24,8 @@ class GlobalOp; namespace fir::runtime { void genMain(fir::FirOpBuilder &builder, mlir::Location loc, - const std::vector &defs); + const std::vector &defs, + bool initCuda = false); } #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H diff --git a/flang/include/flang/Runtime/CUDA/init.h b/flang/include/flang/Runtime/CUDA/init.h new file mode 100644 index 0000000000000..24bc683822720 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/init.h @@ -0,0 +1,20 @@ +//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_ +#define FORTRAN_RUNTIME_CUDA_INIT_H_ + +#include "common.h" +#include "flang/Runtime/entry-names.h" + +extern "C" { + +void RTDECL(CUFInit)(); +} + +#endif // FORTRAN_RUNTIME_CUDA_INIT_H_ diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index d92dc0cf9abd6..ff80826216e4f 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { if (hasMainProgram) createGlobalOutsideOfFunctionLowering([&]() { fir::runtime::genMain(*builder, toLocation(), - bridge.getEnvironmentDefaults()); + bridge.getEnvironmentDefaults(), + getFoldingContext().languageFeatures().IsEnabled( + Fortran::common::LanguageFeature::CUDA)); }); finalizeOpenACCLowering(); diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp index ab3c4ca81314c..5156fd5402077 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp @@ -16,13 +16,17 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Runtime/main.h" #include "flang/Runtime/stop.h" +#ifdef FLANG_CUDA_SUPPORT +#include "flang/Runtime/CUDA/init.h" +#endif using namespace Fortran::runtime; /// Create a `int main(...)` that calls the Fortran entry point void fir::runtime::genMain( fir::FirOpBuilder &builder, mlir::Location loc, - const std::vector &defs) { + const std::vector &defs, + bool initCuda) { auto *context = builder.getContext(); auto argcTy = builder.getDefaultIntegerType(); auto ptrTy = mlir::LLVM::LLVMPointerType::get(context); @@ -61,6 +65,15 @@ void fir::runtime::genMain( args.push_back(env); builder.create(loc, startFn, args); + +#ifdef FLANG_CUDA_SUPPORT + if (initCuda) { + auto initFn = builder.createFunction( + loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {})); + builder.create(loc, initFn); + } +#endif + builder.create(loc, qqMainFn); builder.create(loc, stopFn); diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 23e01da72eded..bfbae58086c1f 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME} allocator.cpp allocatable.cpp descriptor.cpp + init.cpp kernel.cpp memmove-function.cpp memory.cpp diff --git a/flang/runtime/CUDA/init.cpp b/flang/runtime/CUDA/init.cpp new file mode 100644 index 0000000000000..2bffce842b952 --- /dev/null +++ b/flang/runtime/CUDA/init.cpp @@ -0,0 +1,25 @@ +//===-- runtime/CUDA/init.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/init.h" +#include "../environment.h" +#include "../terminator.h" +#include "flang/Runtime/CUDA/common.h" + +#include "cuda_runtime.h" + +extern "C" { + +void RTDEF(CUFInit)() { + // Perform ctx initialization based on execution environment if necessary. + if (Fortran::runtime::executionEnvironment.cudaStackLimit) { + CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize, + Fortran::runtime::executionEnvironment.cudaStackLimit)); + } +} +} diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index 52b1d99ba536e..678d8745c9fd7 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -143,6 +143,18 @@ void ExecutionEnvironment::Configure(int ac, const char *av[], } } + if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) { + char *end; + auto n{std::strtoul(x, &end, 10)}; + if (n > 0 && n < std::numeric_limits::max() && *end == '\0') { + cudaStackLimit = n; + } else { + std::fprintf(stderr, + "Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n", + x); + } + } + // TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment } diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index b8b9f10e4e57f..500aa925a625b 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -56,6 +56,9 @@ struct ExecutionEnvironment { bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP" bool defaultUTF8{false}; // DEFAULT_UTF8 bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION + + // CUDA related variables + std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE }; RT_OFFLOAD_VAR_GROUP_BEGIN