From a509073944186f25f4dab18b474619cc3609da49 Mon Sep 17 00:00:00 2001 From: Kazuaki Matsumura Date: Tue, 17 Jun 2025 17:39:06 -0700 Subject: [PATCH 1/2] [flang][acc] Generate acc.copyout for the reduction clause on compute constructs --- flang/lib/Lower/OpenACC.cpp | 8 +++++++- .../acc-reduction-unwrap-defaultbounds.f90 | 20 ++++++++++++++++++- flang/test/Lower/OpenACC/acc-reduction.f90 | 20 ++++++++++++++++++- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 69e9c53baa740..63a9d1d5616a9 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -2676,7 +2676,8 @@ static Op createComputeOp( llvm::SmallVector waitOperands, attachEntryOperands, copyEntryOperands, copyinEntryOperands, copyoutEntryOperands, createEntryOperands, nocreateEntryOperands, presentEntryOperands, - dataClauseOperands, numGangs, numWorkers, vectorLength, async; + reductionEntryOperands, dataClauseOperands, numGangs, numWorkers, + vectorLength, async; llvm::SmallVector numGangsDeviceTypes, numWorkersDeviceTypes, vectorLengthDeviceTypes, asyncDeviceTypes, asyncOnlyDeviceTypes, waitOperandsDeviceTypes, waitOnlyDeviceTypes; @@ -2912,9 +2913,12 @@ static Op createComputeOp( // combined construct implies a copy clause so issue an implicit copy // instead. if (!combinedConstructs) { + auto crtDataStart = reductionOperands.size(); genReductions(reductionClause->v, converter, semanticsContext, stmtCtx, reductionOperands, reductionRecipes, async, asyncDeviceTypes, asyncOnlyDeviceTypes); + reductionEntryOperands.append(reductionOperands.begin() + crtDataStart, + reductionOperands.end()); } else { auto crtDataStart = dataClauseOperands.size(); genDataOperandOperations( @@ -3038,6 +3042,8 @@ static Op createComputeOp( builder, nocreateEntryOperands, /*structured=*/true); genDataExitOperations( builder, presentEntryOperands, /*structured=*/true); + genDataExitOperations( + builder, reductionEntryOperands, /*structured=*/true); builder.restoreInsertionPoint(insPt); return computeOp; diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 index 5bb751678ed53..bb76122aaffac 100644 --- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 @@ -1001,6 +1001,7 @@ subroutine acc_reduction_iand() ! CHECK-LABEL: func.func @_QPacc_reduction_iand() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_ior() integer :: i @@ -1011,6 +1012,7 @@ subroutine acc_reduction_ior() ! CHECK-LABEL: func.func @_QPacc_reduction_ior() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_ieor() integer :: i @@ -1021,6 +1023,7 @@ subroutine acc_reduction_ieor() ! CHECK-LABEL: func.func @_QPacc_reduction_ieor() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_and() logical :: l @@ -1033,6 +1036,7 @@ subroutine acc_reduction_and() ! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_or() logical :: l @@ -1043,6 +1047,7 @@ subroutine acc_reduction_or() ! CHECK-LABEL: func.func @_QPacc_reduction_or() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_eqv() logical :: l @@ -1053,6 +1058,7 @@ subroutine acc_reduction_eqv() ! CHECK-LABEL: func.func @_QPacc_reduction_eqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_neqv() logical :: l @@ -1063,6 +1069,7 @@ subroutine acc_reduction_neqv() ! CHECK-LABEL: func.func @_QPacc_reduction_neqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_add_cmplx() complex :: c @@ -1073,6 +1080,7 @@ subroutine acc_reduction_add_cmplx() ! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} ! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "c"} subroutine acc_reduction_mul_cmplx() complex :: c @@ -1083,6 +1091,7 @@ subroutine acc_reduction_mul_cmplx() ! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} ! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "c"} subroutine acc_reduction_add_alloc() integer, allocatable :: i @@ -1098,6 +1107,7 @@ subroutine acc_reduction_add_alloc() ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>) -> !fir.heap ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap) -> !fir.heap {name = "i"} ! CHECK: acc.parallel reduction(@reduction_add_heap_i32 -> %[[RED]] : !fir.heap) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap) to varPtr(%[[BOX_ADDR]] : !fir.heap) {dataClause = #acc, name = "i"} subroutine acc_reduction_add_pointer(i) integer, pointer :: i @@ -1112,6 +1122,7 @@ subroutine acc_reduction_add_pointer(i) ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr) -> !fir.ptr {name = "i"} ! CHECK: acc.parallel reduction(@reduction_add_ptr_i32 -> %[[RED]] : !fir.ptr) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr) to varPtr(%[[BOX_ADDR]] : !fir.ptr) {dataClause = #acc, name = "i"} subroutine acc_reduction_add_static_slice(a) integer :: a(100) @@ -1129,6 +1140,7 @@ subroutine acc_reduction_add_static_slice(a) ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index) ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(11:20)"} ! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%[[BOUND]]) to varPtr(%[[DECLARG0]]#0 : !fir.ref>) {dataClause = #acc, name = "a(11:20)"} subroutine acc_reduction_add_dynamic_extent_add(a) integer :: a(:) @@ -1141,6 +1153,7 @@ subroutine acc_reduction_add_dynamic_extent_add(a) ! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_add_box_Uxi32 -> %[[RED:.*]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_assumed_shape_max(a) real :: a(:) @@ -1153,6 +1166,7 @@ subroutine acc_reduction_add_assumed_shape_max(a) ! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) bounds(%{{.*}}) -> !fir.ref> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_Uxf32 -> %[[RED]] : !fir.ref>) { +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_dynamic_extent_add_with_section(a) integer :: a(:) @@ -1167,6 +1181,7 @@ subroutine acc_reduction_add_dynamic_extent_add_with_section(a) ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[DECL]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(2:4)"} ! CHECK: acc.parallel reduction(@reduction_add_section_lb1.ub3_box_Uxi32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ref>) {dataClause = #acc, name = "a(2:4)"} subroutine acc_reduction_add_allocatable(a) real, allocatable :: a(:) @@ -1180,8 +1195,9 @@ subroutine acc_reduction_add_allocatable(a) ! CHECK: %[[BOX:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>> ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c0{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}}#1 : index) stride(%{{.*}}#2 : index) startIdx(%{{.*}}#0 : index) {strideInBytes = true} ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box>>) -> !fir.heap> -! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap>) bounds(%{{[0-9]+}}) -> !fir.heap> {name = "a"} +! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap>) bounds(%[[BOUND]]) -> !fir.heap> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_heap_Uxf32 -> %[[RED]] : !fir.heap>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.heap>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.heap>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_pointer_array(a) real, pointer :: a(:) @@ -1197,6 +1213,7 @@ subroutine acc_reduction_add_pointer_array(a) ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box>>) -> !fir.ptr> ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr>) bounds(%[[BOUND]]) -> !fir.ptr> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_ptr_Uxf32 -> %[[RED]] : !fir.ptr>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ptr>) bounds(%[[BOUND]]) to varPtr(%[[BOX_ADDR]] : !fir.ptr>) {dataClause = #acc, name = "a"} subroutine acc_reduction_max_dynamic_extent_max(a, n) integer :: n @@ -1211,3 +1228,4 @@ subroutine acc_reduction_max_dynamic_extent_max(a, n) ! CHECK: %[[ADDR:.*]] = fir.box_addr %[[DECL_A]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[ADDR]] : !fir.ref>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_UxUxf32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%{{.*}}) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "a"} diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 index 20b5ad28f78a1..22a52739171b1 100644 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -1042,6 +1042,7 @@ subroutine acc_reduction_iand() ! CHECK-LABEL: func.func @_QPacc_reduction_iand() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_ior() integer :: i @@ -1052,6 +1053,7 @@ subroutine acc_reduction_ior() ! CHECK-LABEL: func.func @_QPacc_reduction_ior() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_ieor() integer :: i @@ -1062,6 +1064,7 @@ subroutine acc_reduction_ieor() ! CHECK-LABEL: func.func @_QPacc_reduction_ieor() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} ! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref) to varPtr(%{{.*}} : !fir.ref) {dataClause = #acc, name = "i"} subroutine acc_reduction_and() logical :: l @@ -1074,6 +1077,7 @@ subroutine acc_reduction_and() ! CHECK: %[[DECLL:.*]]:2 = hlfir.declare %[[L]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLL]]#0 : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_or() logical :: l @@ -1084,6 +1088,7 @@ subroutine acc_reduction_or() ! CHECK-LABEL: func.func @_QPacc_reduction_or() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_eqv() logical :: l @@ -1094,6 +1099,7 @@ subroutine acc_reduction_eqv() ! CHECK-LABEL: func.func @_QPacc_reduction_eqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_neqv() logical :: l @@ -1104,6 +1110,7 @@ subroutine acc_reduction_neqv() ! CHECK-LABEL: func.func @_QPacc_reduction_neqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} ! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "l"} subroutine acc_reduction_add_cmplx() complex :: c @@ -1114,6 +1121,7 @@ subroutine acc_reduction_add_cmplx() ! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} ! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "c"} subroutine acc_reduction_mul_cmplx() complex :: c @@ -1124,6 +1132,7 @@ subroutine acc_reduction_mul_cmplx() ! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} ! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) to varPtr(%{{.*}} : !fir.ref>) {dataClause = #acc, name = "c"} subroutine acc_reduction_add_alloc() integer, allocatable :: i @@ -1137,6 +1146,7 @@ subroutine acc_reduction_add_alloc() ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOCA]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECL]]#0 : !fir.ref>>) -> !fir.ref>> {name = "i"} ! CHECK: acc.parallel reduction(@reduction_add_ref_box_heap_i32 -> %[[RED]] : !fir.ref>>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>>) to varPtr(%[[DECL]]#0 : !fir.ref>>) {dataClause = #acc, name = "i"} subroutine acc_reduction_add_pointer(i) integer, pointer :: i @@ -1149,6 +1159,7 @@ subroutine acc_reduction_add_pointer(i) ! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref>>) -> !fir.ref>> {name = "i"} ! CHECK: acc.parallel reduction(@reduction_add_ref_box_ptr_i32 -> %[[RED]] : !fir.ref>>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>>) to varPtr(%[[DECLARG0]]#0 : !fir.ref>>) {dataClause = #acc, name = "i"} subroutine acc_reduction_add_static_slice(a) integer :: a(100) @@ -1166,6 +1177,7 @@ subroutine acc_reduction_add_static_slice(a) ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) extent(%[[C100]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index) ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(11:20)"} ! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>) bounds(%[[BOUND]]) to varPtr(%[[DECLARG0]]#0 : !fir.ref>) {dataClause = #acc, name = "a(11:20)"} subroutine acc_reduction_add_dynamic_extent_add(a) integer :: a(:) @@ -1178,6 +1190,7 @@ subroutine acc_reduction_add_dynamic_extent_add(a) ! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! CHECK: %[[RED:.*]] = acc.reduction var(%{{.*}} : !fir.box>) -> !fir.box> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_add_box_Uxi32 -> %[[RED:.*]] : !fir.box>) +! CHECK: acc.copyout accVar(%[[RED]] : !fir.box>) to var(%{{.*}} : !fir.box>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_assumed_shape_max(a) real :: a(:) @@ -1190,6 +1203,7 @@ subroutine acc_reduction_add_assumed_shape_max(a) ! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]] ! CHECK: %[[RED:.*]] = acc.reduction var(%{{.*}} : !fir.box>) -> !fir.box> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_Uxf32 -> %[[RED]] : !fir.box>) { +! CHECK: acc.copyout accVar(%[[RED]] : !fir.box>) to var(%{{.*}} : !fir.box>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_dynamic_extent_add_with_section(a) integer :: a(:) @@ -1202,7 +1216,8 @@ subroutine acc_reduction_add_dynamic_extent_add_with_section(a) ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {uniq_name = "_QFacc_reduction_add_dynamic_extent_add_with_sectionEa"} : (!fir.box>, !fir.dscope) -> (!fir.box>, !fir.box>) ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%c1{{.*}} : index) upperbound(%c3{{.*}} : index) extent(%{{.*}}#1 : index) stride(%{{.*}}#2 : index) startIdx(%{{.*}} : index) {strideInBytes = true} ! CHECK: %[[RED:.*]] = acc.reduction var(%[[DECL]]#0 : !fir.box>) bounds(%[[BOUND]]) -> !fir.box> {name = "a(2:4)"} -! CHECK: acc.parallel reduction(@reduction_add_section_lb1.ub3_box_Uxi32 -> %[[RED]] : !fir.box>) +! CHECK: acc.parallel reduction(@reduction_add_section_lb1.ub3_box_Uxi32 -> %[[RED]] : !fir.box> +! CHECK: acc.copyout accVar(%[[RED]] : !fir.box>) bounds(%[[BOUND]]) to var(%[[DECL]]#0 : !fir.box>) {dataClause = #acc, name = "a(2:4)"} subroutine acc_reduction_add_allocatable(a) real, allocatable :: a(:) @@ -1215,6 +1230,7 @@ subroutine acc_reduction_add_allocatable(a) ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFacc_reduction_add_allocatableEa"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECL]]#0 : !fir.ref>>>) -> !fir.ref>>> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_ref_box_heap_Uxf32 -> %[[RED]] : !fir.ref>>>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>>>) to varPtr(%[[DECL]]#0 : !fir.ref>>>) {dataClause = #acc, name = "a"} subroutine acc_reduction_add_pointer_array(a) real, pointer :: a(:) @@ -1227,6 +1243,7 @@ subroutine acc_reduction_add_pointer_array(a) ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFacc_reduction_add_pointer_arrayEa"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECL]]#0 : !fir.ref>>>) -> !fir.ref>>> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_ref_box_ptr_Uxf32 -> %[[RED]] : !fir.ref>>>) +! CHECK: acc.copyout accPtr(%[[RED]] : !fir.ref>>>) to varPtr(%[[DECL]]#0 : !fir.ref>>>) {dataClause = #acc, name = "a"} subroutine acc_reduction_max_dynamic_extent_max(a, n) integer :: n @@ -1240,3 +1257,4 @@ subroutine acc_reduction_max_dynamic_extent_max(a, n) ! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{[0-9]+}} {uniq_name = "_QFacc_reduction_max_dynamic_extent_maxEa"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) ! CHECK: %[[RED:.*]] = acc.reduction var(%[[DECL_A]]#0 : !fir.box>) -> !fir.box> {name = "a"} ! CHECK: acc.parallel reduction(@reduction_max_box_UxUxf32 -> %[[RED]] : !fir.box>) +! CHECK: acc.copyout accVar(%[[RED]] : !fir.box>) to var(%[[DECL_A]]#0 : !fir.box>) {dataClause = #acc, name = "a"} From acbc6cdc22d4742cd3748dd0137168ba3124c0cc Mon Sep 17 00:00:00 2001 From: Kazuaki Matsumura Date: Wed, 18 Jun 2025 00:25:00 -0700 Subject: [PATCH 2/2] [flang][acc] Reorder the generation of data exit --- flang/lib/Lower/OpenACC.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 63a9d1d5616a9..3ad726b068bb4 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3028,6 +3028,8 @@ static Op createComputeOp( builder.setInsertionPointAfter(computeOp); // Create the exit operations after the region. + genDataExitOperations( + builder, reductionEntryOperands, /*structured=*/true); genDataExitOperations( builder, copyEntryOperands, /*structured=*/true); genDataExitOperations( @@ -3042,8 +3044,6 @@ static Op createComputeOp( builder, nocreateEntryOperands, /*structured=*/true); genDataExitOperations( builder, presentEntryOperands, /*structured=*/true); - genDataExitOperations( - builder, reductionEntryOperands, /*structured=*/true); builder.restoreInsertionPoint(insPt); return computeOp;