diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index d34a8af0394a4..d06587c57d44b 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -162,7 +162,7 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> { let arguments = (ins Arg:$src, Arg:$dst, - Optional:$shape, + Optional:$shape, cuf_DataTransferKindAttr:$transfer_kind); let assemblyFormat = [{ diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 0b03e070a0076..b05991a29a321 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -102,7 +102,7 @@ llvm::LogicalResult cuf::DataTransferOp::verify() { mlir::Type srcTy = getSrc().getType(); mlir::Type dstTy = getDst().getType(); if (getShape()) { - if (!fir::isa_ref_type(srcTy) || !fir::isa_ref_type(dstTy)) + if (!fir::isa_ref_type(srcTy) && !fir::isa_ref_type(dstTy)) return emitOpError() << "shape can only be specified on data transfer with references"; } diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 2db2973212501..58a3cdc905d36 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -570,13 +570,21 @@ struct CUFDataTransferOpConversion mlir::Type i64Ty = builder.getI64Type(); mlir::Value nbElement; if (op.getShape()) { - auto shapeOp = - mlir::dyn_cast(op.getShape().getDefiningOp()); - nbElement = rewriter.create(loc, i64Ty, - shapeOp.getExtents()[0]); - for (unsigned i = 1; i < shapeOp.getExtents().size(); ++i) { - auto operand = rewriter.create( - loc, i64Ty, shapeOp.getExtents()[i]); + llvm::SmallVector extents; + if (auto shapeOp = + mlir::dyn_cast(op.getShape().getDefiningOp())) { + extents = shapeOp.getExtents(); + } else if (auto shapeShiftOp = mlir::dyn_cast( + op.getShape().getDefiningOp())) { + for (auto i : llvm::enumerate(shapeShiftOp.getPairs())) + if (i.index() & 1) + extents.push_back(i.value()); + } + + nbElement = rewriter.create(loc, i64Ty, extents[0]); + for (unsigned i = 1; i < extents.size(); ++i) { + auto operand = + rewriter.create(loc, i64Ty, extents[i]); nbElement = rewriter.create(loc, nbElement, operand); } diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index 9a425cbc59624..491d417271ce7 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -327,4 +327,39 @@ func.func @_QPtest_array_type() { // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c12{{.*}} : i64 // CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +func.func @_QPshape_shift() { + %c0_i32 = arith.constant 0 : i32 + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %0 = cuf.alloc !fir.array<10xi32> {bindc_name = "cdev", data_attr = #cuf.cuda, uniq_name = "_QFshape_shiftEcdev"} -> !fir.ref> + %1 = fir.shape_shift %c11, %c10 : (index, index) -> !fir.shapeshift<1> + %2:2 = hlfir.declare %0(%1) {data_attr = #cuf.cuda, uniq_name = "_QFshape_shiftEcdev"} : (!fir.ref>, !fir.shapeshift<1>) -> (!fir.box>, !fir.ref>) + cuf.data_transfer %c0_i32 to %2#1, %1 : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer} : i32, !fir.ref> + cuf.free %2#1 : !fir.ref> {data_attr = #cuf.cuda} + return +} + +// CHECK-LABEL: func.func @_QPshape_shift() +// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc + +func.func @_QPshape_shift2() { + %c11 = arith.constant 11 : index + %c10 = arith.constant 10 : index + %0 = fir.alloca !fir.array<10xi32> {bindc_name = "ahost", uniq_name = "_QFshape_shift2Eahost"} + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFshape_shift2Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %3 = cuf.alloc !fir.array<10xi32> {bindc_name = "cdev", data_attr = #cuf.cuda, uniq_name = "_QFshape_shift2Ecdev"} -> !fir.ref> + %4 = fir.shape_shift %c11, %c10 : (index, index) -> !fir.shapeshift<1> + %5:2 = hlfir.declare %3(%4) {data_attr = #cuf.cuda, uniq_name = "_QFshape_shift2Ecdev"} : (!fir.ref>, !fir.shapeshift<1>) -> (!fir.box>, !fir.ref>) + cuf.data_transfer %2#0 to %5#1, %4 : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref> + cuf.free %5#1 : !fir.ref> {data_attr = #cuf.cuda} + return +} + +// CHECK-LABEL: func.func @_QPshape_shift2() +// CHECK: %[[C10:.*]] = fir.convert %c10{{.*}} : (index) -> i64 +// CHECK: %[[BYTES:.*]] = arith.muli %[[C10]], %c4{{.*}} : i64 +// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %[[BYTES]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none + + } // end of module diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 2f76b5e78800a..3b6cd67d9a8fa 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -7,6 +7,8 @@ module mod1 integer :: i end type + integer, device, dimension(11:20) :: cdev + contains function dev1(a) integer, device :: a(:) @@ -16,6 +18,7 @@ contains end subroutine sub1() + use mod1 integer, device :: m integer, device :: adev(10) integer :: i, ahost(10), bhost(10) @@ -34,6 +37,8 @@ subroutine sub1() adev = 10 + cdev = 0 + end ! CHECK-LABEL: func.func @_QPsub1() @@ -70,6 +75,8 @@ end ! CHECK: cuf.data_transfer %c10{{.*}} to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer} : i32, !fir.ref> +! CHECK: cuf.data_transfer %c0{{.*}} to %{{.*}}#1, %{{.*}} : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer} : i32, !fir.ref> + subroutine sub2() integer, device :: m integer, device :: adev(10), bdev(10) @@ -124,7 +131,7 @@ end ! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"} ! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[T:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.ref>