Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions flang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
add_compile_definitions(FLANG_CUDA_SUPPORT=1)
endif()

add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(cmake/modules)
Expand All @@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
endif()

add_subdirectory(runtime)

if (LLVM_INCLUDE_EXAMPLES)
Expand Down
3 changes: 2 additions & 1 deletion flang/include/flang/Optimizer/Builder/Runtime/Main.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class GlobalOp;
namespace fir::runtime {

void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
const std::vector<Fortran::lower::EnvironmentDefault> &defs);
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
bool initCuda = false);
}

#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
20 changes: 20 additions & 0 deletions flang/include/flang/Runtime/CUDA/init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
#define FORTRAN_RUNTIME_CUDA_INIT_H_

#include "common.h"
#include "flang/Runtime/entry-names.h"

extern "C" {

void RTDECL(CUFInit)();
}

#endif // FORTRAN_RUNTIME_CUDA_INIT_H_
4 changes: 3 additions & 1 deletion flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (hasMainProgram)
createGlobalOutsideOfFunctionLowering([&]() {
fir::runtime::genMain(*builder, toLocation(),
bridge.getEnvironmentDefaults());
bridge.getEnvironmentDefaults(),
getFoldingContext().languageFeatures().IsEnabled(
Fortran::common::LanguageFeature::CUDA));
});

finalizeOpenACCLowering();
Expand Down
15 changes: 14 additions & 1 deletion flang/lib/Optimizer/Builder/Runtime/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Runtime/main.h"
#include "flang/Runtime/stop.h"
#ifdef FLANG_CUDA_SUPPORT
#include "flang/Runtime/CUDA/init.h"
#endif
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason for protecting this include with an ifdef, if this header contains only a runtime signature, it does not seem like it require some CUDA support when building flang.

The initCuda dynamic flag seems enough to me to control the feature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The header is not present if the cuda fortran rubtime is not compiled

Copy link
Member

@Meinersbur Meinersbur Jan 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether the build configuration compiles CUFRuntime or not, init.h is still present in the git checkout.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's correct


using namespace Fortran::runtime;

/// Create a `int main(...)` that calls the Fortran entry point
void fir::runtime::genMain(
fir::FirOpBuilder &builder, mlir::Location loc,
const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
bool initCuda) {
auto *context = builder.getContext();
auto argcTy = builder.getDefaultIntegerType();
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
Expand Down Expand Up @@ -61,6 +65,15 @@ void fir::runtime::genMain(
args.push_back(env);

builder.create<fir::CallOp>(loc, startFn, args);

#ifdef FLANG_CUDA_SUPPORT
if (initCuda) {
auto initFn = builder.createFunction(
Copy link
Member

@Meinersbur Meinersbur Jan 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What should happen in the following cases?

  • Flang is compiled with FLANG_CUDA_SUPPORT, compiles a CUDA program, which is then executed in an environment without CUFRuntime.so.
  • Flang is compiled without FLANG_CUDA_SUPPORT, compiles a CUDA program which is then executed in an environment without CUFRuntime.so.
  • Flang is compiled without FLANG_CUDA_SUPPORT, compiles a CUDA program which is then executed in an environment that supports CUDA. ACC_OFFLOAD_STACK_SIZE is just ignored?
  • Flang is compiled with FLANG_CUDA_SUPPORT, compiles a CUDA program, is statically linked to libCUFRuntime.a, which is then executed in an environment that does not support CUDA.

loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
builder.create<fir::CallOp>(loc, initFn);
}
#endif

builder.create<fir::CallOp>(loc, qqMainFn);
builder.create<fir::CallOp>(loc, stopFn);

Expand Down
1 change: 1 addition & 0 deletions flang/runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
allocatable.cpp
descriptor.cpp
init.cpp
kernel.cpp
memmove-function.cpp
memory.cpp
Expand Down
25 changes: 25 additions & 0 deletions flang/runtime/CUDA/init.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/init.h"
#include "../environment.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"

#include "cuda_runtime.h"

extern "C" {

void RTDEF(CUFInit)() {
// Perform ctx initialization based on execution environment if necessary.
if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
Fortran::runtime::executionEnvironment.cudaStackLimit));
}
}
}
12 changes: 12 additions & 0 deletions flang/runtime/environment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}

if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) {
char *end;
auto n{std::strtoul(x, &end, 10)};
if (n > 0 && n < std::numeric_limits<std::size_t>::max() && *end == '\0') {
cudaStackLimit = n;
} else {
std::fprintf(stderr,
"Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n",
x);
}
}

// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}

Expand Down
3 changes: 3 additions & 0 deletions flang/runtime/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ struct ExecutionEnvironment {
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
bool defaultUTF8{false}; // DEFAULT_UTF8
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION

// CUDA related variables
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
};

RT_OFFLOAD_VAR_GROUP_BEGIN
Expand Down