diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h new file mode 100644 index 0000000000000..db5242696303f --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/pointer.h @@ -0,0 +1,27 @@ +//===-- include/flang/Runtime/CUDA/pointer.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_POINTER_H_ +#define FORTRAN_RUNTIME_CUDA_POINTER_H_ + +#include "flang/Runtime/descriptor-consts.h" +#include "flang/Runtime/entry-names.h" + +namespace Fortran::runtime::cuda { + +extern "C" { + +/// Perform allocation of the descriptor. +int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1, + bool hasStat = false, const Descriptor *errMsg = nullptr, + const char *sourceFile = nullptr, int sourceLine = 0); + +} // extern "C" + +} // namespace Fortran::runtime::cuda +#endif // FORTRAN_RUNTIME_CUDA_POINTER_H_ diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 8b8c00fa7ecfc..23248f6d12622 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -20,6 +20,7 @@ #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" +#include "flang/Runtime/CUDA/pointer.h" #include "flang/Runtime/allocatable.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -161,7 +162,18 @@ struct CUFAllocateOpConversion fir::FirOpBuilder builder(rewriter, mod); mlir::Location loc = op.getLoc(); + bool isPointer = false; + + if (auto declareOp = + mlir::dyn_cast_or_null(op.getBox().getDefiningOp())) + if (declareOp.getFortranAttrs() && + bitEnumContainsAny(*declareOp.getFortranAttrs(), + fir::FortranVariableFlagsEnum::pointer)) + isPointer = true; + if (hasDoubleDescriptors(op)) { + if (isPointer) + TODO(loc, "pointer allocation with double descriptors"); // Allocation for module variable are done with custom runtime entry point // so the descriptors can be synchronized. mlir::func::FuncOp func; @@ -176,13 +188,20 @@ struct CUFAllocateOpConversion } mlir::func::FuncOp func; - if (op.getSource()) + if (op.getSource()) { + if (isPointer) + TODO(loc, "pointer allocation with source"); func = fir::runtime::getRuntimeFunc( loc, builder); - else - func = fir::runtime::getRuntimeFunc( - loc, builder); + } else { + func = + isPointer + ? fir::runtime::getRuntimeFunc( + loc, builder) + : fir::runtime::getRuntimeFunc( + loc, builder); + } return convertOpToCall(op, rewriter, func); } diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 3a88824826de3..23e01da72eded 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -20,6 +20,7 @@ add_flang_library(${CUFRT_LIBNAME} kernel.cpp memmove-function.cpp memory.cpp + pointer.cpp registration.cpp ) diff --git a/flang/runtime/CUDA/pointer.cpp b/flang/runtime/CUDA/pointer.cpp new file mode 100644 index 0000000000000..0c5d3a5a6297d --- /dev/null +++ b/flang/runtime/CUDA/pointer.cpp @@ -0,0 +1,40 @@ +//===-- runtime/CUDA/pointer.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/pointer.h" +#include "../stat.h" +#include "../terminator.h" +#include "flang/Runtime/pointer.h" + +#include "cuda_runtime.h" + +namespace Fortran::runtime::cuda { + +extern "C" { +RT_EXT_API_GROUP_BEGIN + +int RTDEF(CUFPointerAllocate)(Descriptor &desc, int64_t stream, bool hasStat, + const Descriptor *errMsg, const char *sourceFile, int sourceLine) { + if (desc.HasAddendum()) { + Terminator terminator{sourceFile, sourceLine}; + // TODO: This require a bit more work to set the correct type descriptor + // address + terminator.Crash( + "not yet implemented: CUDA descriptor allocation with addendum"); + } + // Perform the standard allocation. + int stat{ + RTNAME(PointerAllocate)(desc, hasStat, errMsg, sourceFile, sourceLine)}; + return stat; +} + +RT_EXT_API_GROUP_END + +} // extern "C" + +} // namespace Fortran::runtime::cuda diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index 35c6e2a77a697..2ac9498d35541 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -181,4 +181,15 @@ func.func @_QQallocate_stream() { // CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref // CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 + +func.func @_QPp_alloc() { + %0 = cuf.alloc !fir.box>>> {bindc_name = "complex_array", data_attr = #cuf.cuda, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref>>>> + %4 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref>>>>) -> !fir.ref>>>> + %9 = cuf.allocate %4 : !fir.ref>>>> {data_attr = #cuf.cuda} -> i32 + return +} + +// CHECK-LABEL: func.func @_QPp_alloc() +// CHECK: fir.call @_FortranACUFPointerAllocate + } // end of module