Skip to content

Commit 556d3f7

Browse files
authored
[flang][do concurrent] For loop bounds, use host_eval on host and map_entries on device (llvm#1800)
2 parents e323313 + 01d9fb6 commit 556d3f7

File tree

8 files changed

+124
-22
lines changed

8 files changed

+124
-22
lines changed

flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "flang/Optimizer/Builder/BoxValue.h"
10+
#include "flang/Optimizer/Builder/DirectivesCommon.h"
911
#include "flang/Optimizer/Builder/FIRBuilder.h"
12+
#include "flang/Optimizer/Builder/HLFIRTools.h"
1013
#include "flang/Optimizer/Builder/Todo.h"
1114
#include "flang/Optimizer/Dialect/FIRDialect.h"
1215
#include "flang/Optimizer/Dialect/FIROps.h"
1316
#include "flang/Optimizer/Dialect/FIRType.h"
1417
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
15-
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
1618
#include "flang/Optimizer/HLFIR/HLFIROps.h"
1719
#include "flang/Optimizer/OpenMP/Passes.h"
1820
#include "mlir/Analysis/SliceAnalysis.h"
19-
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
2021
#include "mlir/Dialect/Func/IR/FuncOps.h"
21-
#include "mlir/Dialect/Math/IR/Math.h"
2222
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
2323
#include "mlir/IR/Diagnostics.h"
2424
#include "mlir/IR/IRMapping.h"
@@ -243,6 +243,10 @@ void collectLoopLiveIns(fir::DoLoopOp doLoop,
243243
llvm::SmallDenseSet<mlir::Value> seenValues;
244244
llvm::SmallDenseSet<mlir::Operation *> seenOps;
245245

246+
liveIns.push_back(doLoop.getLowerBound());
247+
liveIns.push_back(doLoop.getUpperBound());
248+
liveIns.push_back(doLoop.getStep());
249+
246250
mlir::visitUsedValuesDefinedAbove(
247251
doLoop.getRegion(), [&](mlir::OpOperand *operand) {
248252
if (!seenValues.insert(operand->get()).second)
@@ -548,7 +552,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
548552
"These will be serialzied.");
549553

550554
llvm::SmallVector<mlir::Value> loopNestLiveIns;
551-
looputils::collectLoopLiveIns(loopNest.back().first, loopNestLiveIns);
555+
looputils::collectLoopLiveIns(loopNest.front().first, loopNestLiveIns);
552556
assert(!loopNestLiveIns.empty());
553557

554558
llvm::SetVector<mlir::Value> locals;
@@ -569,19 +573,15 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
569573
mlir::IRMapping mapper;
570574

571575
if (mapToDevice) {
572-
// TODO: Currently the loop bounds for the outer loop are duplicated.
576+
mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>();
577+
bool isTargetDevice =
578+
llvm::cast<mlir::omp::OffloadModuleInterface>(*module)
579+
.getIsTargetDevice();
580+
573581
mlir::omp::TargetOperands targetClauseOps;
574582
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper,
575-
loopNestClauseOps, &targetClauseOps);
576-
577-
// Prevent mapping host-evaluated variables.
578-
loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
579-
[&](mlir::Value liveIn) {
580-
return llvm::is_contained(
581-
targetClauseOps.hostEvalVars,
582-
liveIn);
583-
}),
584-
loopNestLiveIns.end());
583+
loopNestClauseOps,
584+
isTargetDevice ? nullptr : &targetClauseOps);
585585

586586
LiveInShapeInfoMap liveInShapeInfoMap;
587587
// The outermost loop will contain all the live-in values in all nested

flang/test/Transforms/DoConcurrent/basic_device.f90

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ program do_concurrent_basic
4040

4141
! CHECK: omp.target
4242
! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index)
43-
! CHECK-SAME: map_entries(%[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
43+
! CHECK-SAME: map_entries(
44+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
45+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
46+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
47+
! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]],
4448
! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]]
4549

4650
! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]]
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
! Tests `host_eval` clause code-gen and loop nest bounds on host vs. device.
2+
3+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \
4+
! RUN: -fdo-concurrent-to-openmp=device %s -o - \
5+
! RUN: | FileCheck %s --check-prefix=HOST -vv
6+
7+
! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp \
8+
! RUN: -fopenmp-is-target-device -fdo-concurrent-to-openmp=device %s -o - \
9+
! RUN: | FileCheck %s --check-prefix=DEVICE
10+
11+
program do_concurrent_host_eval
12+
implicit none
13+
integer :: i, j
14+
15+
do concurrent (i=1:10, j=1:20)
16+
end do
17+
end program do_concurrent_host_eval
18+
19+
! HOST: omp.target host_eval(
20+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_LB:[^,]+]],
21+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_UB:[^,]+]],
22+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_ST:[^,]+]],
23+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_LB:[^,]+]],
24+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_UB:[^,]+]],
25+
! HOST-SAME: %{{[^[:space:]]+}} -> %[[J_ST:[^,]+]] : {{.*}}) map_entries
26+
27+
! HOST: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to
28+
! HOST-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step
29+
! HOST-SAME: (%[[I_ST]], %[[J_ST]])
30+
31+
! DEVICE: omp.target map_entries(
32+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_LB_MAP:[^,]+]],
33+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_UB_MAP:[^,]+]],
34+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ST_MAP:[^,]+]],
35+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
36+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
37+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_LB_MAP:[^,]+]],
38+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_UB_MAP:[^,]+]],
39+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ST_MAP:[^,]+]] : {{.*}})
40+
41+
! DEVICE: %[[I_LB_DECL:.*]]:2 = hlfir.declare %[[I_LB_MAP]]
42+
! DEVICE: %[[I_LB:.*]] = fir.load %[[I_LB_DECL]]#1 : !fir.ref<index>
43+
44+
! DEVICE: %[[I_UB_DECL:.*]]:2 = hlfir.declare %[[I_UB_MAP]]
45+
! DEVICE: %[[I_UB:.*]] = fir.load %[[I_UB_DECL]]#1 : !fir.ref<index>
46+
47+
! DEVICE: %[[I_ST_DECL:.*]]:2 = hlfir.declare %[[I_ST_MAP]]
48+
! DEVICE: %[[I_ST:.*]] = fir.load %[[I_ST_DECL]]#1 : !fir.ref<index>
49+
50+
! DEVICE: %[[J_LB_DECL:.*]]:2 = hlfir.declare %[[J_LB_MAP]]
51+
! DEVICE: %[[J_LB:.*]] = fir.load %[[J_LB_DECL]]#1 : !fir.ref<index>
52+
53+
! DEVICE: %[[J_UB_DECL:.*]]:2 = hlfir.declare %[[J_UB_MAP]]
54+
! DEVICE: %[[J_UB:.*]] = fir.load %[[J_UB_DECL]]#1 : !fir.ref<index>
55+
56+
! DEVICE: %[[J_ST_DECL:.*]]:2 = hlfir.declare %[[J_ST_MAP]]
57+
! DEVICE: %[[J_ST:.*]] = fir.load %[[J_ST_DECL]]#1 : !fir.ref<index>
58+
59+
! DEVICE: omp.loop_nest ({{.*}}, {{.*}}) : index = (%[[I_LB]], %[[J_LB]]) to
60+
! DEVICE-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step
61+
! DEVICE-SAME: (%[[I_ST]], %[[J_ST]])

flang/test/Transforms/DoConcurrent/map_shape_info.f90

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ end program do_concurrent_shape
2323
! CHECK: omp.map.info
2424
! CHECK: omp.map.info
2525

26+
! CHECK: omp.map.info
27+
! CHECK: omp.map.info
28+
! CHECK: omp.map.info
29+
30+
! CHECK: omp.map.info
31+
! CHECK: omp.map.info
32+
! CHECK: omp.map.info
33+
2634
! CHECK: %[[DIM0_EXT_MAP:.*]] = omp.map.info
2735
! CHECK-SAME: var_ptr(%[[DIM0_EXT]] : !fir.ref<index>, index)
2836
! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
@@ -37,6 +45,12 @@ end program do_concurrent_shape
3745
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
3846
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
3947
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
48+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
49+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
50+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
51+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
52+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
53+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
4054
! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]],
4155
! CHECK-SAME: %[[DIM1_EXT_MAP]] -> %[[DIM1_EXT_ARG:[^,]+]] : {{.*}})
4256

@@ -59,6 +73,10 @@ end subroutine do_concurrent_shape_shift
5973
! CHECK: fir.store %{{c2.*}} to %[[DIM0_STRT:.*]] : !fir.ref<index>
6074
! CHECK: fir.store %{{c9.*}} to %[[DIM0_EXT:.*]] : !fir.ref<index>
6175

76+
! CHECK: omp.map.info
77+
! CHECK: omp.map.info
78+
! CHECK: omp.map.info
79+
6280
! CHECK: omp.map.info
6381
! CHECK: omp.map.info
6482

@@ -75,6 +93,9 @@ end subroutine do_concurrent_shape_shift
7593
! CHECK: omp.target host_eval({{.*}}) map_entries(
7694
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
7795
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
96+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
97+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
98+
! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}},
7899
! CHECK-SAME: %[[DIM0_STRT_MAP]] -> %[[DIM0_STRT_ARG:[^,]+]],
79100
! CHECK-SAME: %[[DIM0_EXT_MAP]] -> %[[DIM0_EXT_ARG:[^,]+]] : {{.*}})
80101

flang/test/Transforms/DoConcurrent/non_reference_to_device.f90

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@ subroutine test_non_refernece
1919
end associate
2020
end subroutine test_non_refernece
2121

22+
! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
23+
! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
24+
! CHECK: omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
25+
2226
! CHECK: %[[DIM_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<index>, index)
2327
! CHECK-SAME: map_clauses(implicit, exit_release_or_enter_alloc)
2428
! CHECK-SAME: capture(ByCopy) -> !fir.ref<index> {name = ""}
2529

2630

2731
! CHECK: omp.target host_eval({{.*}} : index, index, index)
28-
! CHECK-SAME: map_entries(%{{.*}} -> %{{.*}}, %[[DIM_MAP]] -> %{{.*}} :
32+
! CHECK-SAME: map_entries({{.*}}, %[[DIM_MAP]] -> %{{.*}} :
2933
! CHECK-SAME: !fir.ref<i32>, !fir.ref<index>)
3034

flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ program main
2929
! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"}
3030
! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
3131

32-
! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
32+
! DEVICE: omp.target {{.*}}map_entries(
33+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
34+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
35+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
36+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
3337
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[X_ARG:[^,]+]],
3438
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]],
3539
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]],

flang/test/Transforms/DoConcurrent/runtime_sized_array.f90

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@ subroutine foo(n)
2525
! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "_QFfooEa.extent.dim0"}
2626

2727
! CHECK: omp.target
28-
! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]],
29-
! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]],
30-
! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}})
28+
! CHECK-SAME: map_entries(
29+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
30+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
31+
! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
32+
! CHECK-SAME: %[[I_MAP]] -> %[[I_ARG:arg[0-9]*]],
33+
! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]],
34+
! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}})
3135
! CHECK-SAME: {{.*}} {
3236

3337
! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]]

flang/test/Transforms/DoConcurrent/skip_all_nested_loops.f90

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ program main
2929
! HOST: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j", {{.*}}}
3030
! HOST: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
3131

32-
! DEVICE: omp.target {{.*}}map_entries(%{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
32+
! DEVICE: omp.target {{.*}}map_entries(
33+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
34+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
35+
! DEVICE-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}},
36+
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ARG:[^,]+]],
3337
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[J_ARG:[^,]+]],
3438
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[K_ARG:[^,]+]],
3539
! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[A_ARG:[^,]+]],

0 commit comments

Comments
 (0)