diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 17699dadc7511..f1ebd08967b9a 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -628,6 +628,12 @@ struct CUFDataTransferOpConversion mlir::Value dst = getDeviceAddress(rewriter, op.getDstMutable(), symtab); mlir::Value src = getDeviceAddress(rewriter, op.getSrcMutable(), symtab); + // Materialize the src if constant. + if (matchPattern(src.getDefiningOp(), mlir::m_Constant())) { + mlir::Value temp = builder.createTemporary(loc, srcTy); + builder.create(loc, src, temp); + src = temp; + } llvm::SmallVector args{ fir::runtime::createArguments(builder, loc, fTy, dst, src, bytes, modeValue, sourceFile, sourceLine)}; diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index 5f10dc0562d17..0f9ca6e640a80 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -513,4 +513,44 @@ func.func @_QPcallkernel(%arg0: !fir.box>> {fir.bind // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ALLOCA]] : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFDataTransferDescDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> none +func.func @_QPsrc_cst() { + %0 = fir.dummy_scope : !fir.dscope + %1 = cuf.alloc !fir.box>> {bindc_name = "d4", data_attr = #cuf.cuda, uniq_name = "_QFsub4Ed4"} -> !fir.ref>>> + %5:2 = hlfir.declare %1 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub4Ed4"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %6 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsub4Ei"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFsub4Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + %c10_i32 = arith.constant 10 : i32 + %c0_i32 = arith.constant 0 : i32 + %9 = fir.convert %5#1 : (!fir.ref>>>) -> !fir.ref> + %c6_i32 = arith.constant 6 : i32 + %14 = fir.convert %c6_i32 : (i32) -> index + %c10_i32_0 = arith.constant 10 : i32 + %15 = fir.convert %c10_i32_0 : (i32) -> index + %c1_1 = arith.constant 1 : index + %16 = fir.convert %14 : (index) -> i32 + %17:2 = fir.do_loop %arg1 = %14 to %15 step %c1_1 iter_args(%arg2 = %16) -> (index, i32) { + fir.store %arg2 to %7#1 : !fir.ref + %cst = arith.constant -4.000000e+00 : f32 + %22 = fir.load %5#0 : !fir.ref>>> + %23 = fir.load %7#0 : !fir.ref + %24 = fir.convert %23 : (i32) -> i64 + %25 = hlfir.designate %22 (%24) : (!fir.box>>, i64) -> !fir.ref + cuf.data_transfer %cst to %25 {transfer_kind = #cuf.cuda_transfer} : f32, !fir.ref + %26 = arith.addi %arg1, %c1_1 : index + %27 = fir.convert %c1_1 : (index) -> i32 + %28 = fir.load %7#1 : !fir.ref + %29 = arith.addi %28, %27 : i32 + fir.result %26, %29 : index, i32 + } + return +} + +// CHECK-LABEL: func.func @_QPsrc_cst() +// CHECK: %[[ALLOCA:.*]] = fir.alloca f32 +// CHECK: %[[CST:.*]] = arith.constant -4.000000e+00 : f32 +// CHECK: fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref +// CHECK: %[[CONV:.*]] = fir.convert %[[ALLOCA]] : (!fir.ref) -> !fir.llvm_ptr +// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %[[CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none + } // end of module