Skip to content

Commit b451b1f

Browse files
authored
[Flang][OpenMP] Increase restrictions on descriptor deferral (llvm#3381)
2 parents 77805e3 + 2192c99 commit b451b1f

File tree

7 files changed

+68
-31
lines changed

7 files changed

+68
-31
lines changed

flang/include/flang/Optimizer/OpenMP/Passes.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def MapInfoFinalizationPass
2323
let dependentDialects = ["mlir::omp::OpenMPDialect"];
2424

2525
let options = [Option<"deferDescMapping", "opt-defer-desc-mapping",
26-
"bool", /*default=*/"false",
26+
"bool", /*default=*/"true",
2727
"Activates or deactivates deferred descriptor mapping, "
2828
"which delays mapping of top-level descriptors to target "
2929
"regions and target data regions">];

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
494494

495495
opts.DeferDescriptorMapping =
496496
args.hasFlag(clang::driver::options::OPT_fdefer_desc_map,
497-
clang::driver::options::OPT_fno_defer_desc_map, false);
497+
clang::driver::options::OPT_fno_defer_desc_map, true);
498498

499499
if (const llvm::opt::Arg *arg =
500500
args.getLastArg(clang::driver::options::OPT_complex_range_EQ)) {

flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ class MapInfoFinalizationPass
8282
/// | |
8383
std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
8484

85+
// List of deferrable descriptors to process at the end of
86+
// the pass.
87+
llvm::SmallVector<mlir::Operation *> deferrableDesc;
88+
8589
/// getMemberUserList gathers all users of a particular MapInfoOp that are
8690
/// other MapInfoOp's and places them into the mapMemberUsers list, which
8791
/// records the map that the current argument MapInfoOp "op" is part of
@@ -126,20 +130,51 @@ class MapInfoFinalizationPass
126130
});
127131
}
128132

133+
// Check if the declaration operation we have refers to a dummy
134+
// function argument.
135+
bool isDummyArgument(mlir::Operation *op) {
136+
if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
137+
if (auto dummyScope = declareOp.getDummyScope())
138+
return true;
139+
return false;
140+
}
141+
142+
// Relevant for OpenMP < 5.2, where attach semantics and rules don't exist.
143+
// As descriptors were an unspoken implementation detail in these versions
144+
// there's certain cases where the user (and the compiler implementation)
145+
// can create data mapping errors by having temporary descriptors stuck
146+
// in memory. To avoid this we can defer the descriptor mapping in these
147+
// cases until target or target data regions, when we can be sure they
148+
// have a clear limited scope on device.
149+
bool canDeferDescriptorMapping(mlir::Value descriptor) {
150+
if (!fir::isAllocatableType(descriptor.getType()) ||
151+
!fir::isPointerType(descriptor.getType())) {
152+
if (isDummyArgument(descriptor.getDefiningOp()) &&
153+
(fir::isAssumedType(descriptor.getType()) ||
154+
fir::isAssumedShape(descriptor.getType()))) {
155+
return true;
156+
}
157+
}
158+
return false;
159+
}
160+
129161
/// When provided a MapInfoOp containing a descriptor type that
130162
/// we must expand into multiple maps this function will extract
131163
/// the value from it and return it, in certain cases we must
132164
/// generate a new allocation to store into so that the
133165
/// fir::BoxOffsetOp we utilise to access the descriptor datas
134166
/// base address can be utilised.
135167
mlir::Value getDescriptorFromBoxMap(mlir::omp::MapInfoOp boxMap,
136-
fir::FirOpBuilder &builder) {
168+
fir::FirOpBuilder &builder,
169+
bool &canDescBeDeferred) {
137170
mlir::Value descriptor = boxMap.getVarPtr();
138171
if (!fir::isTypeWithDescriptor(boxMap.getVarType()))
139172
if (auto addrOp = mlir::dyn_cast_if_present<fir::BoxAddrOp>(
140173
boxMap.getVarPtr().getDefiningOp()))
141174
descriptor = addrOp.getVal();
142175

176+
canDescBeDeferred = canDeferDescriptorMapping(descriptor);
177+
143178
if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()) &&
144179
!fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
145180
return descriptor;
@@ -291,7 +326,7 @@ class MapInfoFinalizationPass
291326
if (llvm::isa_and_nonnull<mlir::omp::TargetExitDataOp,
292327
mlir::omp::TargetUpdateOp>(target)) {
293328
mapFlags flags = mapFlags(mapTypeFlag);
294-
if (!IsHasDeviceAddr)
329+
if (!IsHasDeviceAddr)
295330
flags |= mapFlags::OMP_MAP_DESCRIPTOR;
296331
return llvm::to_underlying(flags);
297332
}
@@ -415,7 +450,9 @@ class MapInfoFinalizationPass
415450

416451
// TODO: map the addendum segment of the descriptor, similarly to the
417452
// base address/data pointer member.
418-
mlir::Value descriptor = getDescriptorFromBoxMap(op, builder);
453+
bool descCanBeDeferred = false;
454+
mlir::Value descriptor =
455+
getDescriptorFromBoxMap(op, builder, descCanBeDeferred);
419456

420457
mlir::ArrayAttr newMembersAttr;
421458
mlir::SmallVector<mlir::Value> newMembers;
@@ -491,6 +528,10 @@ class MapInfoFinalizationPass
491528
/*partial_map=*/builder.getBoolAttr(false));
492529
op.replaceAllUsesWith(newDescParentMapOp.getResult());
493530
op->erase();
531+
532+
if (descCanBeDeferred)
533+
deferrableDesc.push_back(newDescParentMapOp);
534+
494535
return newDescParentMapOp;
495536
}
496537

@@ -784,6 +825,7 @@ class MapInfoFinalizationPass
784825
// clear all local allocations we made for any boxes in any prior
785826
// iterations from previous function scopes.
786827
localBoxAllocas.clear();
828+
deferrableDesc.clear();
787829

788830
// First, walk `omp.map.info` ops to see if any of them have varPtrs
789831
// with an underlying type of fir.char<k, ?>, i.e a character
@@ -817,7 +859,7 @@ class MapInfoFinalizationPass
817859
op.getBoundsMutable().append(boundsOps);
818860
});
819861

820-
// Next, walk `omp.map.info` ops to see if any record members should be
862+
// Next, walk `omp.map.info` ops to see if any record members should be
821863
// implicitly mapped.
822864
// TODO/FIXME/UPDATE: I believe we need to add implicit capture of
823865
// allocatable members of arbitrary depths for this before we can
@@ -1059,18 +1101,14 @@ class MapInfoFinalizationPass
10591101
// data and the runtime should correctly associate the data with the
10601102
// descriptor and bind together and allow clean mapping and execution.
10611103
if (deferDescMapping) {
1062-
func->walk([&](mlir::omp::MapInfoOp op) {
1063-
if (fir::isTypeWithDescriptor(op.getVarType()) ||
1064-
mlir::isa_and_present<fir::BoxAddrOp>(
1065-
op.getVarPtr().getDefiningOp())) {
1066-
mlir::Operation *targetUser = getFirstTargetUser(op);
1067-
assert(targetUser &&
1068-
"expected user of map operation was not found");
1069-
builder.setInsertionPoint(op);
1070-
removeTopLevelDescriptor(op, builder, targetUser);
1071-
addImplictDescriptorMapToTargetDataOp(op, builder, targetUser);
1072-
}
1073-
});
1104+
for (auto *op : deferrableDesc) {
1105+
auto mapOp = llvm::dyn_cast<mlir::omp::MapInfoOp>(op);
1106+
mlir::Operation *targetUser = getFirstTargetUser(mapOp);
1107+
assert(targetUser && "expected user of map operation was not found");
1108+
builder.setInsertionPoint(mapOp);
1109+
removeTopLevelDescriptor(mapOp, builder, targetUser);
1110+
addImplictDescriptorMapToTargetDataOp(mapOp, builder, targetUser);
1111+
}
10741112
}
10751113

10761114
// Wait until after we have generated all of our maps to add them onto

flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
! The "use_device_addr" was added to the "target data" directive in OpenMP 5.0.
2-
! RUN: %flang_fc1 -fdefer-desc-map -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
3-
! RUN: bbc -fdefer-desc-map -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
2+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
3+
! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
44
! This tests primary goal is to check the promotion of non-CPTR arguments from
55
! use_device_ptr to use_device_addr works, without breaking any functionality.
66

77
!CHECK: func.func @{{.*}}only_use_device_ptr()
8-
9-
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) use_device_ptr(%{{.*}} -> %{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
8+
!CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) use_device_ptr(%{{.*}} -> %{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
109
subroutine only_use_device_ptr
1110
use iso_c_binding
1211
integer, pointer, dimension(:) :: array
@@ -18,7 +17,7 @@ subroutine only_use_device_ptr
1817
end subroutine
1918

2019
!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr()
21-
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
20+
!CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr({{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
2221
subroutine mix_use_device_ptr_and_addr
2322
use iso_c_binding
2423
integer, pointer, dimension(:) :: array
@@ -30,7 +29,7 @@ subroutine mix_use_device_ptr_and_addr
3029
end subroutine
3130

3231
!CHECK: func.func @{{.*}}only_use_device_addr()
33-
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
32+
!CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
3433
subroutine only_use_device_addr
3534
use iso_c_binding
3635
integer, pointer, dimension(:) :: array
@@ -42,7 +41,7 @@ subroutine only_use_device_addr
4241
end subroutine
4342

4443
!CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map()
45-
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
44+
!CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref<i32>, !fir.ref<i32>) use_device_addr(%{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}}, %{{.*}} -> %{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) use_device_ptr(%{{.*}} : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>) {
4645
subroutine mix_use_device_ptr_and_addr_and_map
4746
use iso_c_binding
4847
integer :: i, j

flang/test/Lower/volatile-openmp.f90

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
! RUN: bbc -fdefer-desc-map --strict-fir-volatile-verifier -fopenmp %s -o - | FileCheck %s
1+
! RUN: bbc --strict-fir-volatile-verifier -fopenmp %s -o - | FileCheck %s
22
type t
33
integer, pointer :: array(:)
44
end type
@@ -46,8 +46,8 @@
4646
! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_33]]#1, %[[VAL_1]] : index
4747
! CHECK: %[[VAL_35:.*]] = omp.map.bounds lower_bound(%[[VAL_0]] : index) upper_bound(%[[VAL_34]] : index) extent(%[[VAL_33]]#1 : index) stride(%[[VAL_33]]#2 : index) start_idx(%[[VAL_32]]#0 : index) {stride_in_bytes = true}
4848
! CHECK: %[[VAL_36:.*]] = fir.box_offset %[[VAL_10]]#1 base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>, volatile>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
49-
! CHECK: %[[VAL_37:.*]] = fir.load %[[VAL_36]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
50-
! CHECK: %[[VAL_38:.*]] = omp.map.info var_ptr(%[[VAL_37]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(descriptor_base_addr, to) capture(ByRef) bounds(%[[VAL_35]]) -> !fir.ref<!fir.array<?xi32>> {name = "array1"}
51-
! CHECK: omp.target_enter_data map_entries(%[[VAL_38]] : !fir.ref<!fir.array<?xi32>>)
49+
! CHECK: %[[VAL_37:.*]] = omp.map.info var_ptr(%[[VAL_10]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>, volatile>, i32) map_clauses(descriptor_base_addr, to) capture(ByRef) var_ptr_ptr(%[[VAL_36]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%[[VAL_35]]) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
50+
! CHECK: %[[VAL_38:.*]] = omp.map.info var_ptr(%[[VAL_10]]#1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>, volatile>, !fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>) map_clauses(always, descriptor, to) capture(ByRef) members(%[[VAL_37]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>, volatile> {name = "array1"}
51+
! CHECK: omp.target_enter_data map_entries(%[[VAL_38]], %[[VAL_37]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>, volatile>, volatile>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>)
5252
! CHECK: return
5353
! CHECK: }

flang/test/Transforms/omp-map-info-finalization.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: fir-opt --split-input-file --omp-map-info-finalization %s | FileCheck %s
2-
// XFAIL: *
2+
33
func.func @test_descriptor_expansion_pass(%arg0: !fir.box<!fir.array<?xi32>>) {
44
%0 = fir.alloca !fir.box<!fir.heap<i32>>
55
%1 = fir.zero_bits !fir.heap<i32>

flang/tools/bbc/bbc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ static llvm::cl::opt<bool>
146146
deferDescMap("fdefer-desc-map",
147147
llvm::cl::desc("disable or enable OpenMP deference of mapping "
148148
"for top-level descriptors"),
149-
llvm::cl::init(false));
149+
llvm::cl::init(true));
150150

151151
static llvm::cl::opt<std::string> enableDoConcurrentToOpenMPConversion(
152152
"fdo-concurrent-to-openmp",

0 commit comments

Comments
 (0)