-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[flang][cuda] Support shape shift in data transfer op. #115929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesWhen an array is declared with a non default lower bound, the declare op Full diff: https://github.com/llvm/llvm-project/pull/115929.diff 5 Files Affected:
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index d34a8af0394a44..d06587c57d44b6 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -162,7 +162,7 @@ def cuf_DataTransferOp : cuf_Op<"data_transfer", []> {
let arguments = (ins Arg<AnyType, "", [MemRead]>:$src,
Arg<AnyRefOrBoxType, "", [MemWrite]>:$dst,
- Optional<fir_ShapeType>:$shape,
+ Optional<AnyShapeOrShiftType>:$shape,
cuf_DataTransferKindAttr:$transfer_kind);
let assemblyFormat = [{
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 0b03e070a0076e..b05991a29a3213 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -102,7 +102,7 @@ llvm::LogicalResult cuf::DataTransferOp::verify() {
mlir::Type srcTy = getSrc().getType();
mlir::Type dstTy = getDst().getType();
if (getShape()) {
- if (!fir::isa_ref_type(srcTy) || !fir::isa_ref_type(dstTy))
+ if (!fir::isa_ref_type(srcTy) && !fir::isa_ref_type(dstTy))
return emitOpError()
<< "shape can only be specified on data transfer with references";
}
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 7ecb3b1a7bf27d..494e462aa38ad2 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -564,13 +564,21 @@ struct CUFDataTransferOpConversion
mlir::Type i64Ty = builder.getI64Type();
mlir::Value nbElement;
if (op.getShape()) {
- auto shapeOp =
- mlir::dyn_cast<fir::ShapeOp>(op.getShape().getDefiningOp());
- nbElement = rewriter.create<fir::ConvertOp>(loc, i64Ty,
- shapeOp.getExtents()[0]);
- for (unsigned i = 1; i < shapeOp.getExtents().size(); ++i) {
- auto operand = rewriter.create<fir::ConvertOp>(
- loc, i64Ty, shapeOp.getExtents()[i]);
+ llvm::SmallVector<mlir::Value> extents;
+ if (auto shapeOp =
+ mlir::dyn_cast<fir::ShapeOp>(op.getShape().getDefiningOp())) {
+ extents = shapeOp.getExtents();
+ } else if (auto shapeShiftOp = mlir::dyn_cast<fir::ShapeShiftOp>(
+ op.getShape().getDefiningOp())) {
+ for (auto i : llvm::enumerate(shapeShiftOp.getPairs()))
+ if (i.index() & 1)
+ extents.push_back(i.value());
+ }
+
+ nbElement = rewriter.create<fir::ConvertOp>(loc, i64Ty, extents[0]);
+ for (unsigned i = 1; i < extents.size(); ++i) {
+ auto operand =
+ rewriter.create<fir::ConvertOp>(loc, i64Ty, extents[i]);
nbElement =
rewriter.create<mlir::arith::MulIOp>(loc, nbElement, operand);
}
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 1a31c4c6d17a4f..982590217fcb31 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -308,4 +308,39 @@ func.func @_QPtest_type() {
// CHECK-LABEL: func.func @_QPtest_type()
// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %c12{{.*}}, %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> none
+func.func @_QPshape_shift() {
+ %c0_i32 = arith.constant 0 : i32
+ %c11 = arith.constant 11 : index
+ %c10 = arith.constant 10 : index
+ %0 = cuf.alloc !fir.array<10xi32> {bindc_name = "cdev", data_attr = #cuf.cuda<device>, uniq_name = "_QFshape_shiftEcdev"} -> !fir.ref<!fir.array<10xi32>>
+ %1 = fir.shape_shift %c11, %c10 : (index, index) -> !fir.shapeshift<1>
+ %2:2 = hlfir.declare %0(%1) {data_attr = #cuf.cuda<device>, uniq_name = "_QFshape_shiftEcdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ cuf.data_transfer %c0_i32 to %2#1, %1 : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
+ cuf.free %2#1 : !fir.ref<!fir.array<10xi32>> {data_attr = #cuf.cuda<device>}
+ return
+}
+
+// CHECK-LABEL: func.func @_QPshape_shift()
+// CHECK: fir.call @_FortranACUFDataTransferDescDescNoRealloc
+
+func.func @_QPshape_shift2() {
+ %c11 = arith.constant 11 : index
+ %c10 = arith.constant 10 : index
+ %0 = fir.alloca !fir.array<10xi32> {bindc_name = "ahost", uniq_name = "_QFshape_shift2Eahost"}
+ %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+ %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFshape_shift2Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ %3 = cuf.alloc !fir.array<10xi32> {bindc_name = "cdev", data_attr = #cuf.cuda<device>, uniq_name = "_QFshape_shift2Ecdev"} -> !fir.ref<!fir.array<10xi32>>
+ %4 = fir.shape_shift %c11, %c10 : (index, index) -> !fir.shapeshift<1>
+ %5:2 = hlfir.declare %3(%4) {data_attr = #cuf.cuda<device>, uniq_name = "_QFshape_shift2Ecdev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+ cuf.data_transfer %2#0 to %5#1, %4 : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>
+ cuf.free %5#1 : !fir.ref<!fir.array<10xi32>> {data_attr = #cuf.cuda<device>}
+ return
+}
+
+// CHECK-LABEL: func.func @_QPshape_shift2()
+// CHECK: %[[C10:.*]] = fir.convert %c10{{.*}} : (index) -> i64
+// CHECK: %[[BYTES:.*]] = arith.muli %[[C10]], %c4{{.*}} : i64
+// CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %[[BYTES]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> none
+
+
} // end of module
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 2f76b5e78800ad..3b6cd67d9a8fa5 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -7,6 +7,8 @@ module mod1
integer :: i
end type
+ integer, device, dimension(11:20) :: cdev
+
contains
function dev1(a)
integer, device :: a(:)
@@ -16,6 +18,7 @@ contains
end
subroutine sub1()
+ use mod1
integer, device :: m
integer, device :: adev(10)
integer :: i, ahost(10), bhost(10)
@@ -34,6 +37,8 @@ subroutine sub1()
adev = 10
+ cdev = 0
+
end
! CHECK-LABEL: func.func @_QPsub1()
@@ -70,6 +75,8 @@ end
! CHECK: cuf.data_transfer %c10{{.*}} to %[[ADEV]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
+! CHECK: cuf.data_transfer %c0{{.*}} to %{{.*}}#1, %{{.*}} : !fir.shapeshift<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : i32, !fir.ref<!fir.array<10xi32>>
+
subroutine sub2()
integer, device :: m
integer, device :: adev(10), bdev(10)
@@ -124,7 +131,7 @@ end
! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"}
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
-! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
+! CHECK: %[[T:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub3Et"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>) -> (!fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>)
! CHECK: cuf.data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>, !fir.ref<!fir.type<_QMmod1Tt1{i:i32}>>
|
2480604 to
0ec366c
Compare
0ec366c to
804115a
Compare
When an array is declared with a non default lower bound, the declare op
getShapewill return aShapeShiftOp. This result is used in data transfer operation to compute the number of bytes to transfer. Update the op to supportShapeShiftOp.