Skip to content

Commit 73272d6

Browse files
authored
[Flang][OpenMP] Appropriately emit present/load/store in all cases in MapInfoFinalization (#150311)
Currently, we return early whenever we've already generated an allocation for intermediate descriptor variables (required in certain cases when we can't directly access the base address of a passes in descriptor function argument due to HLFIR/FIR restrictions). This unfortunately, skips over the presence check and load/store required to set the intermediate descriptor allocations values/data. This is fine in most cases, but if a function happens to have a series of branches with seperate target regions capturing the same input argument, we'd emit the present/load/store into the first branch with the first target inside of it, the secondary (or any preceding) branches would not have the present/load/store, this would lead to the subsequent mapped values in that branch being empty and then leading to a memory access violation on device. The fix for the moment is to emit a present/load/store at the relevant location of every target utilising the input argument, this likely will also lead to fixing possible issues with the input argument being manipulated inbetween target regions (primarily resizing, the data should remain the same as we're just copying an address around, in theory at least). There's possible optimizations/simplifications to emit less load/stores such as by raising the load/store out of the branches when we can, but I'm inclined to leave this sort of optimization to lower level passes such as an LLVM pass (which very possibly already covers it).
1 parent adb2421 commit 73272d6

File tree

3 files changed

+122
-22
lines changed

3 files changed

+122
-22
lines changed

flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -137,32 +137,41 @@ class MapInfoFinalizationPass
137137
!fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
138138
return descriptor;
139139

140-
mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
141-
if (slot) {
142-
return slot;
140+
mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()];
141+
mlir::Location loc = boxMap->getLoc();
142+
143+
if (!alloca) {
144+
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
145+
// allowing it to access non-reference box operations can cause some
146+
// problematic SSA IR. However, in the case of assumed shape's the type
147+
// is not a !fir.ref, in these cases to retrieve the appropriate
148+
// !fir.ref<!fir.box<...>> to access the data we need to map we must
149+
// perform an alloca and then store to it and retrieve the data from the
150+
// new alloca.
151+
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
152+
mlir::Block *allocaBlock = builder.getAllocaBlock();
153+
assert(allocaBlock && "No alloca block found for this top level op");
154+
builder.setInsertionPointToStart(allocaBlock);
155+
156+
mlir::Type allocaType = descriptor.getType();
157+
if (fir::isBoxAddress(allocaType))
158+
allocaType = fir::unwrapRefType(allocaType);
159+
alloca = fir::AllocaOp::create(builder, loc, allocaType);
160+
builder.restoreInsertionPoint(insPt);
143161
}
144162

145-
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
146-
// allowing it to access non-reference box operations can cause some
147-
// problematic SSA IR. However, in the case of assumed shape's the type
148-
// is not a !fir.ref, in these cases to retrieve the appropriate
149-
// !fir.ref<!fir.box<...>> to access the data we need to map we must
150-
// perform an alloca and then store to it and retrieve the data from the new
151-
// alloca.
152-
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
153-
mlir::Block *allocaBlock = builder.getAllocaBlock();
154-
mlir::Location loc = boxMap->getLoc();
155-
assert(allocaBlock && "No alloca block found for this top level op");
156-
builder.setInsertionPointToStart(allocaBlock);
157-
158-
mlir::Type allocaType = descriptor.getType();
159-
if (fir::isBoxAddress(allocaType))
160-
allocaType = fir::unwrapRefType(allocaType);
161-
auto alloca = fir::AllocaOp::create(builder, loc, allocaType);
162-
builder.restoreInsertionPoint(insPt);
163163
// We should only emit a store if the passed in data is present, it is
164164
// possible a user passes in no argument to an optional parameter, in which
165165
// case we cannot store or we'll segfault on the emitted memcpy.
166+
// TODO: We currently emit a present -> load/store every time we use a
167+
// mapped value that requires a local allocation, this isn't the most
168+
// efficient, although, it is more correct in a lot of situations. One
169+
// such situation is emitting a this series of instructions in separate
170+
// segments of a branch (e.g. two target regions in separate else/if branch
171+
// mapping the same function argument), however, it would be nice to be able
172+
// to optimize these situations e.g. raising the load/store out of the
173+
// branch if possible. But perhaps this is best left to lower level
174+
// optimisation passes.
166175
auto isPresent =
167176
fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor);
168177
builder.genIfOp(loc, {}, isPresent, false)
@@ -171,7 +180,7 @@ class MapInfoFinalizationPass
171180
fir::StoreOp::create(builder, loc, descriptor, alloca);
172181
})
173182
.end();
174-
return slot = alloca;
183+
return alloca;
175184
}
176185

177186
/// Function that generates a FIR operation accessing the descriptor's
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
2+
3+
module mod
4+
contains
5+
subroutine foo(dt, switch)
6+
implicit none
7+
real(4), dimension(:), intent(inout) :: dt
8+
logical, intent(in) :: switch
9+
integer :: dim, idx
10+
11+
if (switch) then
12+
!$omp target teams distribute parallel do
13+
do idx = 1, 100
14+
dt(idx) = 20
15+
end do
16+
else
17+
!$omp target teams distribute parallel do
18+
do idx = 1, 100
19+
dt(idx) = 30
20+
end do
21+
end if
22+
end subroutine foo
23+
end module
24+
25+
! CHECK-LABEL: func.func @{{.*}}(
26+
! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "dt"},
27+
! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
28+
! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope {{.*}}
29+
! CHECK: fir.if %{{.*}} {
30+
! CHECK: %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
31+
! CHECK: fir.if %[[VAL_2]] {
32+
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
33+
! CHECK: }
34+
! CHECK: %[[VAL_3:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
35+
! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_3]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
36+
! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_4]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
37+
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) {
38+
! CHECK: } else {
39+
! CHECK: %[[VAL_6:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
40+
! CHECK: fir.if %[[VAL_6]] {
41+
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
42+
! CHECK: }
43+
! CHECK: %[[VAL_7:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
44+
! CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_7]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
45+
! CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_8]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
46+
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) {
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
! OpenMP offloading test that checks we do not cause a segfault when mapping
2+
! optional function arguments (present or otherwise). No results requiring
3+
! checking other than that the program compiles and runs to completion with no
4+
! error. This particular variation checks that we're correctly emitting the
5+
! load/store in both branches mapping the input array.
6+
! REQUIRES: flang, amdgpu
7+
8+
! RUN: %libomptarget-compile-fortran-run-and-check-generic
9+
module reproducer_mod
10+
contains
11+
subroutine branching_target_call(dt, switch)
12+
implicit none
13+
real(4), dimension(:), intent(inout) :: dt
14+
logical, intent(in) :: switch
15+
integer :: dim, idx
16+
17+
dim = size(dt)
18+
if (switch) then
19+
!$omp target teams distribute parallel do
20+
do idx = 1, dim
21+
dt(idx) = 20
22+
end do
23+
else
24+
!$omp target teams distribute parallel do
25+
do idx = 1, dim
26+
dt(idx) = 30
27+
end do
28+
end if
29+
end subroutine branching_target_call
30+
end module reproducer_mod
31+
32+
program reproducer
33+
use reproducer_mod
34+
implicit none
35+
real(4), dimension(:), allocatable :: dt
36+
integer :: n = 21312
37+
integer :: i
38+
39+
allocate (dt(n))
40+
call branching_target_call(dt, .FALSE.)
41+
call branching_target_call(dt, .TRUE.)
42+
print *, "PASSED"
43+
end program reproducer
44+
45+
! CHECK: PASSED

0 commit comments

Comments
 (0)