Skip to content

Commit 5a5e8ba

Browse files
authored
[MLIR][OpenMP] Add a new AutomapToTargetData conversion pass in FIR (#151989)
Add a new `AutomapToTargetData` pass. This gathers the declare target enter variables which have the `AUTOMAP` modifier. And adds `omp.declare_target_enter/exit` mapping directives for `fir.allocmem` and `fir.freemem` oeprations on the `AUTOMAP` enabled variables. Automap Ref: OpenMP 6.0 section 7.9.7.
1 parent a9f9c7d commit 5a5e8ba

File tree

9 files changed

+283
-39
lines changed

9 files changed

+283
-39
lines changed

flang/include/flang/Optimizer/OpenMP/Passes.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,4 +112,15 @@ def GenericLoopConversionPass
112112
];
113113
}
114114

115+
def AutomapToTargetDataPass
116+
: Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> {
117+
let summary = "Insert OpenMP target data operations for AUTOMAP variables";
118+
let description = [{
119+
Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to
120+
map variables marked with the `AUTOMAP` modifier when their allocation
121+
or deallocation is detected in the FIR.
122+
}];
123+
let dependentDialects = ["mlir::omp::OpenMPDialect"];
124+
}
125+
115126
#endif //FORTRAN_OPTIMIZER_OPENMP_PASSES

flang/include/flang/Support/OpenMP-utils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#ifndef FORTRAN_SUPPORT_OPENMP_UTILS_H_
1010
#define FORTRAN_SUPPORT_OPENMP_UTILS_H_
1111

12+
#include "flang/Optimizer/Builder/FIRBuilder.h"
13+
#include "flang/Optimizer/Dialect/FIRType.h"
1214
#include "flang/Semantics/symbol.h"
1315

1416
#include "mlir/IR/Builders.h"
@@ -72,6 +74,14 @@ struct EntryBlockArgs {
7274
/// \param [in] region - Empty region in which to create the entry block.
7375
mlir::Block *genEntryBlock(
7476
mlir::OpBuilder &builder, const EntryBlockArgs &args, mlir::Region &region);
77+
78+
// Returns true if the variable has a dynamic size and therefore requires
79+
// bounds operations to describe its extents.
80+
bool needsBoundsOps(mlir::Value var);
81+
82+
// Generate MapBoundsOp operations for the variable if required.
83+
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
84+
llvm::SmallVectorImpl<mlir::Value> &boundsOps);
7585
} // namespace Fortran::common::openmp
7686

7787
#endif // FORTRAN_SUPPORT_OPENMP_UTILS_H_
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//===- AutomapToTargetData.cpp -------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Optimizer/Builder/FIRBuilder.h"
10+
#include "flang/Optimizer/Builder/HLFIRTools.h"
11+
#include "flang/Optimizer/Dialect/FIROps.h"
12+
#include "flang/Optimizer/Dialect/FIRType.h"
13+
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
14+
#include "flang/Optimizer/HLFIR/HLFIROps.h"
15+
#include "flang/Support/OpenMP-utils.h"
16+
17+
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
18+
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
19+
#include "mlir/IR/BuiltinAttributes.h"
20+
#include "mlir/IR/Operation.h"
21+
#include "mlir/Pass/Pass.h"
22+
23+
#include "llvm/Frontend/OpenMP/OMPConstants.h"
24+
25+
namespace flangomp {
26+
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
27+
#include "flang/Optimizer/OpenMP/Passes.h.inc"
28+
} // namespace flangomp
29+
30+
using namespace mlir;
31+
using namespace Fortran::common::openmp;
32+
33+
namespace {
34+
class AutomapToTargetDataPass
35+
: public flangomp::impl::AutomapToTargetDataPassBase<
36+
AutomapToTargetDataPass> {
37+
void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp,
38+
llvm::DenseSet<fir::StoreOp> &allocmems,
39+
llvm::DenseSet<fir::LoadOp> &freemems) {
40+
assert(addressOfOp->hasOneUse() && "op must have single use");
41+
42+
auto declaredRef =
43+
cast<hlfir::DeclareOp>(*addressOfOp->getUsers().begin())->getResult(0);
44+
45+
for (Operation *refUser : declaredRef.getUsers()) {
46+
if (auto storeOp = dyn_cast<fir::StoreOp>(refUser))
47+
if (auto emboxOp = storeOp.getValue().getDefiningOp<fir::EmboxOp>())
48+
if (auto allocmemOp =
49+
emboxOp.getOperand(0).getDefiningOp<fir::AllocMemOp>())
50+
allocmems.insert(storeOp);
51+
52+
if (auto loadOp = dyn_cast<fir::LoadOp>(refUser))
53+
for (Operation *loadUser : loadOp.getResult().getUsers())
54+
if (auto boxAddrOp = dyn_cast<fir::BoxAddrOp>(loadUser))
55+
for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers())
56+
if (auto freememOp = dyn_cast<fir::FreeMemOp>(boxAddrUser))
57+
freemems.insert(loadOp);
58+
}
59+
}
60+
61+
void runOnOperation() override {
62+
ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
63+
if (!module)
64+
module = dyn_cast<ModuleOp>(getOperation());
65+
if (!module)
66+
return;
67+
68+
// Build FIR builder for helper utilities.
69+
fir::KindMapping kindMap = fir::getKindMapping(module);
70+
fir::FirOpBuilder builder{module, std::move(kindMap)};
71+
72+
// Collect global variables with AUTOMAP flag.
73+
llvm::DenseSet<fir::GlobalOp> automapGlobals;
74+
module.walk([&](fir::GlobalOp globalOp) {
75+
if (auto iface =
76+
dyn_cast<omp::DeclareTargetInterface>(globalOp.getOperation()))
77+
if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap() &&
78+
iface.getDeclareTargetDeviceType() !=
79+
omp::DeclareTargetDeviceType::host)
80+
automapGlobals.insert(globalOp);
81+
});
82+
83+
auto addMapInfo = [&](auto globalOp, auto memOp) {
84+
builder.setInsertionPointAfter(memOp);
85+
SmallVector<Value> bounds;
86+
if (needsBoundsOps(memOp.getMemref()))
87+
genBoundsOps(builder, memOp.getMemref(), bounds);
88+
89+
omp::TargetEnterExitUpdateDataOperands clauses;
90+
mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
91+
builder, memOp.getLoc(), memOp.getMemref().getType(),
92+
memOp.getMemref(),
93+
TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
94+
builder.getIntegerAttr(
95+
builder.getIntegerType(64, false),
96+
static_cast<unsigned>(
97+
isa<fir::StoreOp>(memOp)
98+
? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
99+
: llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)),
100+
builder.getAttr<omp::VariableCaptureKindAttr>(
101+
omp::VariableCaptureKind::ByCopy),
102+
/*var_ptr_ptr=*/mlir::Value{},
103+
/*members=*/SmallVector<Value>{},
104+
/*members_index=*/ArrayAttr{}, bounds,
105+
/*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
106+
builder.getBoolAttr(false));
107+
clauses.mapVars.push_back(mapInfo);
108+
isa<fir::StoreOp>(memOp)
109+
? builder.create<omp::TargetEnterDataOp>(memOp.getLoc(), clauses)
110+
: builder.create<omp::TargetExitDataOp>(memOp.getLoc(), clauses);
111+
};
112+
113+
for (fir::GlobalOp globalOp : automapGlobals) {
114+
if (auto uses = globalOp.getSymbolUses(module.getOperation())) {
115+
llvm::DenseSet<fir::StoreOp> allocmemStores;
116+
llvm::DenseSet<fir::LoadOp> freememLoads;
117+
for (auto &x : *uses)
118+
if (auto addrOp = dyn_cast<fir::AddrOfOp>(x.getUser()))
119+
findRelatedAllocmemFreemem(addrOp, allocmemStores, freememLoads);
120+
121+
for (auto storeOp : allocmemStores)
122+
addMapInfo(globalOp, storeOp);
123+
124+
for (auto loadOp : freememLoads)
125+
addMapInfo(globalOp, loadOp);
126+
}
127+
}
128+
}
129+
};
130+
} // namespace

flang/lib/Optimizer/OpenMP/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
22

33
add_flang_library(FlangOpenMPTransforms
4+
AutomapToTargetData.cpp
45
DoConcurrentConversion.cpp
56
FunctionFiltering.cpp
67
GenericLoopConversion.cpp

flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
3030
#include "flang/Optimizer/HLFIR/HLFIROps.h"
3131
#include "flang/Optimizer/OpenMP/Passes.h"
32+
#include "flang/Support/OpenMP-utils.h"
3233

3334
#include "mlir/Dialect/Func/IR/FuncOps.h"
3435
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
@@ -47,6 +48,7 @@ namespace flangomp {
4748
} // namespace flangomp
4849

4950
using namespace mlir;
51+
using namespace Fortran::common::openmp;
5052

5153
namespace {
5254
class MapsForPrivatizedSymbolsPass
@@ -193,38 +195,5 @@ class MapsForPrivatizedSymbolsPass
193195
}
194196
}
195197
}
196-
// As the name suggests, this function examines var to determine if
197-
// it has dynamic size. If true, this pass'll have to extract these
198-
// bounds from descriptor of var and add the bounds to the resultant
199-
// MapInfoOp.
200-
bool needsBoundsOps(mlir::Value var) {
201-
assert(mlir::isa<omp::PointerLikeType>(var.getType()) &&
202-
"needsBoundsOps can deal only with pointer types");
203-
mlir::Type t = fir::unwrapRefType(var.getType());
204-
// t could be a box, so look inside the box
205-
auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t);
206-
if (innerType)
207-
return fir::hasDynamicSize(innerType);
208-
return fir::hasDynamicSize(t);
209-
}
210-
211-
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
212-
llvm::SmallVector<mlir::Value> &boundsOps) {
213-
mlir::Location loc = var.getLoc();
214-
fir::factory::AddrAndBoundsInfo info =
215-
fir::factory::getDataOperandBaseAddr(builder, var,
216-
/*isOptional=*/false, loc);
217-
fir::ExtendedValue extendedValue =
218-
hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
219-
/*continguousHint=*/true)
220-
.first;
221-
llvm::SmallVector<mlir::Value> boundsOpsVec =
222-
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
223-
mlir::omp::MapBoundsType>(
224-
builder, info, extendedValue,
225-
/*dataExvIsAssumedSize=*/false, loc);
226-
for (auto bounds : boundsOpsVec)
227-
boundsOps.push_back(bounds);
228-
}
229198
};
230199
} // namespace

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
316316
pm.addPass(flangomp::createDoConcurrentConversionPass(
317317
opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));
318318

319-
// The MapsForPrivatizedSymbols pass needs to run before
320-
// MapInfoFinalizationPass because the former creates new
321-
// MapInfoOp instances, typically for descriptors.
322-
// MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
323-
// underlying data which is necessary to access the data on the offload
324-
// target device.
319+
// The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass need to run
320+
// before MapInfoFinalizationPass because they create new MapInfoOp
321+
// instances, typically for descriptors. MapInfoFinalizationPass adds
322+
// MapInfoOp instances for the descriptors underlying data which is necessary
323+
// to access the data on the offload target device.
325324
pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
325+
pm.addPass(flangomp::createAutomapToTargetDataPass());
326326
pm.addPass(flangomp::createMapInfoFinalizationPass());
327327
pm.addPass(flangomp::createMarkDeclareTargetPass());
328328
pm.addPass(flangomp::createGenericLoopConversionPass());

flang/lib/Support/OpenMP-utils.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "flang/Support/OpenMP-utils.h"
10+
#include "flang/Optimizer/Builder/DirectivesCommon.h"
11+
#include "flang/Optimizer/Builder/HLFIRTools.h"
1012

13+
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
1114
#include "mlir/IR/OpDefinition.h"
1215

1316
namespace Fortran::common::openmp {
@@ -47,4 +50,30 @@ mlir::Block *genEntryBlock(mlir::OpBuilder &builder, const EntryBlockArgs &args,
4750

4851
return builder.createBlock(&region, {}, types, locs);
4952
}
53+
54+
bool needsBoundsOps(mlir::Value var) {
55+
assert(mlir::isa<mlir::omp::PointerLikeType>(var.getType()) &&
56+
"only pointer like types expected");
57+
mlir::Type t = fir::unwrapRefType(var.getType());
58+
if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t))
59+
return fir::hasDynamicSize(inner);
60+
return fir::hasDynamicSize(t);
61+
}
62+
63+
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
64+
llvm::SmallVectorImpl<mlir::Value> &boundsOps) {
65+
mlir::Location loc = var.getLoc();
66+
fir::factory::AddrAndBoundsInfo info =
67+
fir::factory::getDataOperandBaseAddr(builder, var,
68+
/*isOptional=*/false, loc);
69+
fir::ExtendedValue exv =
70+
hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
71+
/*contiguousHint=*/true)
72+
.first;
73+
llvm::SmallVector<mlir::Value> tmp =
74+
fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
75+
mlir::omp::MapBoundsType>(
76+
builder, info, exv, /*dataExvIsAssumedSize=*/false, loc);
77+
llvm::append_range(boundsOps, tmp);
78+
}
5079
} // namespace Fortran::common::openmp
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s
2+
// Test OMP AutomapToTargetData pass.
3+
4+
module {
5+
fir.global
6+
@_QMtestEarr{omp.declare_target = #omp.declaretarget<device_type = (any),
7+
capture_clause = (enter), automap = true>} target
8+
: !fir.box<!fir.heap<!fir.array<?xi32>>>
9+
10+
func.func @automap() {
11+
%c0 = arith.constant 0 : index
12+
%c10 = arith.constant 10 : i32
13+
%addr = fir.address_of(@_QMtestEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
14+
%decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QMtestEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
15+
%idx = fir.convert %c10 : (i32) -> index
16+
%cond = arith.cmpi sgt, %idx, %c0 : index
17+
%n = arith.select %cond, %idx, %c0 : index
18+
%mem = fir.allocmem !fir.array<?xi32>, %n {fir.must_be_heap = true}
19+
%shape = fir.shape %n : (index) -> !fir.shape<1>
20+
%box = fir.embox %mem(%shape) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
21+
fir.store %box to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
22+
%ld = fir.load %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
23+
%base = fir.box_addr %ld : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
24+
fir.freemem %base : !fir.heap<!fir.array<?xi32>>
25+
%undef = fir.zero_bits !fir.heap<!fir.array<?xi32>>
26+
%sh0 = fir.shape %c0 : (index) -> !fir.shape<1>
27+
%empty = fir.embox %undef(%sh0) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
28+
fir.store %empty to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
29+
return
30+
}
31+
}
32+
33+
// CHECK: fir.global @[[AUTOMAP:.*]] {{{.*}} automap = true
34+
// CHECK-LABEL: func.func @automap()
35+
// CHECK: %[[AUTOMAP_ADDR:.*]] = fir.address_of(@[[AUTOMAP]])
36+
// CHECK: %[[AUTOMAP_DECL:.*]]:2 = hlfir.declare %[[AUTOMAP_ADDR]]
37+
// CHECK: %[[ALLOC_MEM:.*]] = fir.allocmem
38+
// CHECK-NEXT: fir.shape
39+
// CHECK-NEXT: %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
40+
// CHECK-NEXT: fir.store %[[ARR_BOXED]]
41+
// CHECK-NEXT: %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
42+
// CHECK-NEXT: %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
43+
// CHECK-NEXT: %[[DIM0:.*]] = arith.constant 0 : index
44+
// CHECK-NEXT: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
45+
// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
46+
// CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : index
47+
// CHECK-NEXT: %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
48+
// CHECK-NEXT: %[[LOWER_BOUND:.*]] = arith.constant 0 : index
49+
// CHECK-NEXT: %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
50+
// CHECK-NEXT: omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
51+
// CHECK-NEXT: arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
52+
// CHECK-NEXT: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
53+
// CHECK-NEXT: omp.target_enter_data map_entries(%[[MAP_INFO]]
54+
// CHECK: %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
55+
// CHECK: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
56+
// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]]
57+
// CHECK-NEXT: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
58+
// CHECK-NEXT: fir.freemem %[[BOXADDR]]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
!Offloading test for AUTOMAP modifier in declare target enter
2+
! REQUIRES: flang, amdgpu
3+
4+
program automap_program
5+
use iso_c_binding, only: c_loc
6+
use omp_lib, only: omp_get_default_device, omp_target_is_present
7+
integer, parameter :: N = 10
8+
integer :: i
9+
integer, allocatable, target :: automap_array(:)
10+
!$omp declare target enter(automap:automap_array)
11+
12+
! false since the storage is not present even though the descriptor is present
13+
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
14+
! CHECK: 0
15+
16+
allocate (automap_array(N))
17+
! true since the storage should be allocated and reference count incremented by the allocate
18+
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
19+
! CHECK: 1
20+
21+
! since storage is present this should not be a runtime error
22+
!$omp target teams loop
23+
do i = 1, N
24+
automap_array(i) = i
25+
end do
26+
27+
!$omp target update from(automap_array)
28+
write (*, *) automap_array
29+
! CHECK: 1 2 3 4 5 6 7 8 9 10
30+
31+
deallocate (automap_array)
32+
33+
! automap_array should have it's storage unmapped on device here
34+
write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
35+
! CHECK: 0
36+
end program

0 commit comments

Comments
 (0)