Skip to content

Commit 938091d

Browse files
[Flang][mlir] - Translation of delayed privatization for deferred target-tasks
This patch adds support for translation of the private clause on deferred target tasks - that is `omp.target` operations with the `nowait` clause. An offloading call for a deferred target-task is not blocking - the offloading host task continues it execution after issuing the offloading call. Therefore, the key problem we need to solve is to ensure that the data needed for private variables to be initialized in the target task persists even after the host task has completed. We do this in a new pass called PrepareForOMPOffloadPrivatizationPass. For a privatized variable that needs its host counterpart for initialization (such as the shape of the data from the descriptor when an allocatable is privatized or the value of the data when an allocatable is firstprivatized), - the pass allocates memory on the heap. - it then initializes this memory by copying the contents of host variable to the newly allocated location on the heap. - Then, the pass updates all the `omp.map.info` operations that pointed to the host variable to now point to the one located in the heap. The pass uses a rewrite pattern applied using the greedy pattern matcher, which in turn does some constant folding and DCE. Due to this a number of lit tests had to be updated. In GEPs constant get folded into indices and truncated to i32 types. In some tests sequence of insertvalue and extractvalue instructions get cancelled out. So, these needed to be updated too.
1 parent 0a8acd2 commit 938091d

File tree

32 files changed

+705
-108
lines changed

32 files changed

+705
-108
lines changed

flang/include/flang/Optimizer/Passes/Pipelines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
2323
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
2424
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
25+
#include "mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h"
2526
#include "mlir/Pass/PassManager.h"
2627
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
2728
#include "mlir/Transforms/Passes.h"

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
5353
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
5454
#include "mlir/Dialect/LLVMIR/Transforms/AddComdats.h"
55+
#include "mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h"
5556
#include "mlir/Dialect/OpenACC/OpenACC.h"
5657
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
5758
#include "mlir/IR/BuiltinTypes.h"

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,12 @@ void createMLIRToLLVMPassPipeline(mlir::PassManager &pm,
411411

412412
// Add codegen pass pipeline.
413413
fir::createDefaultFIRCodeGenPassPipeline(pm, config, inputFilename);
414+
415+
// Run a pass to prepare for translation of delayed privatization in the
416+
// context of deferred target tasks.
417+
addNestedPassConditionally<mlir::LLVM::LLVMFuncOp>(pm, disableFirToLlvmIr,[&]() {
418+
return mlir::LLVM::createPrepareForOMPOffloadPrivatizationPass();
419+
});
414420
}
415421

416422
} // namespace fir

flang/test/Driver/tco-emit-final-mlir.fir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
// CHECK: llvm.return
1414
// CHECK-NOT: func.func
1515

16-
func.func @_QPfoo() {
16+
func.func @_QPfoo() -> !fir.ref<i32> {
1717
%1 = fir.alloca i32
18-
return
18+
return %1 : !fir.ref<i32>
1919
}

flang/test/Driver/tco-test-gen.fir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,10 @@ func.func @_QPtest(%arg0: !fir.ref<i32> {fir.bindc_name = "num"}, %arg1: !fir.re
4242
// CHECK-SAME: %[[ARG2:.*]]: !llvm.ptr {fir.bindc_name = "ub", llvm.nocapture},
4343
// CHECK-SAME: %[[ARG3:.*]]: !llvm.ptr {fir.bindc_name = "step", llvm.nocapture}) {
4444

45+
// CMPLX: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64
46+
// CMPLX: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64
4547
// CMPLX: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64
4648
// CMPLX: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
47-
// CMPLX: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64
48-
// CMPLX: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64
49-
// CMPLX: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64
5049

5150
// SIMPLE: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64
5251
// SIMPLE: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64

flang/test/Fir/alloc-32.fir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> {
1919
// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
2020
// CHECK-SAME: i32 %[[len:.*]])
2121
// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
22-
// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
22+
// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 1
2323
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0
2424
// CHECK: %[[sz:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1
2525
// CHECK: %[[trunc:.*]] = trunc i64 %[[sz]] to i32

flang/test/Fir/alloc.fir

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func.func @alloca_scalar_dynchar_kind(%l : i32) -> !fir.ref<!fir.char<2,?>> {
8686
// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
8787
// CHECK-SAME: i32 %[[len:.*]])
8888
// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
89-
// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
89+
// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 1
9090
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0
9191
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1
9292
// CHECK: call ptr @malloc(i64 %[[size]])
@@ -98,7 +98,7 @@ func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> {
9898
// CHECK-LABEL: define ptr @allocmem_scalar_dynchar_kind(
9999
// CHECK-SAME: i32 %[[len:.*]])
100100
// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
101-
// CHECK: %[[mul2:.*]] = mul i64 2, %[[mul1]]
101+
// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 2
102102
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0
103103
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1
104104
// CHECK: call ptr @malloc(i64 %[[size]])
@@ -185,7 +185,7 @@ func.func @alloca_dynarray_of_nonchar2(%e: index) -> !fir.ref<!fir.array<?x?xi32
185185

186186
// CHECK-LABEL: define ptr @allocmem_dynarray_of_nonchar(
187187
// CHECK-SAME: i64 %[[extent:.*]])
188-
// CHECK: %[[prod1:.*]] = mul i64 12, %[[extent]]
188+
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 12
189189
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[prod1]], 0
190190
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[prod1]], i64 1
191191
// CHECK: call ptr @malloc(i64 %[[size]])
@@ -196,7 +196,7 @@ func.func @allocmem_dynarray_of_nonchar(%e: index) -> !fir.heap<!fir.array<3x?xi
196196

197197
// CHECK-LABEL: define ptr @allocmem_dynarray_of_nonchar2(
198198
// CHECK-SAME: i64 %[[extent:.*]])
199-
// CHECK: %[[prod1:.*]] = mul i64 4, %[[extent]]
199+
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 4
200200
// CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]]
201201
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[prod2]], 0
202202
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[prod2]], i64 1
@@ -227,7 +227,7 @@ func.func @alloca_dynarray_of_char2(%e : index) -> !fir.ref<!fir.array<?x?x!fir.
227227

228228
// CHECK-LABEL: define ptr @allocmem_dynarray_of_char(
229229
// CHECK-SAME: i64 %[[extent:.*]])
230-
// CHECK: %[[prod1:.*]] = mul i64 60, %[[extent]]
230+
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 60
231231
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[prod1]], 0
232232
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[prod1]], i64 1
233233
// CHECK: call ptr @malloc(i64 %[[size]])
@@ -238,7 +238,7 @@ func.func @allocmem_dynarray_of_char(%e : index) -> !fir.heap<!fir.array<3x?x!fi
238238

239239
// CHECK-LABEL: define ptr @allocmem_dynarray_of_char2(
240240
// CHECK-SAME: i64 %[[extent:.*]])
241-
// CHECK: %[[prod1:.*]] = mul i64 20, %[[extent]]
241+
// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 20
242242
// CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]]
243243
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[prod2]], 0
244244
// CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1
@@ -286,7 +286,7 @@ func.func @allocmem_dynarray_of_dynchar(%l: i32, %e : index) -> !fir.heap<!fir.a
286286
// CHECK-LABEL: define ptr @allocmem_dynarray_of_dynchar2(
287287
// CHECK-SAME: i32 %[[len:.*]], i64 %[[extent:.*]])
288288
// CHECK: %[[a:.*]] = sext i32 %[[len]] to i64
289-
// CHECK: %[[prod1:.*]] = mul i64 2, %[[a]]
289+
// CHECK: %[[prod1:.*]] = mul i64 %[[a]], 2
290290
// CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]]
291291
// CHECK: %[[prod3:.*]] = mul i64 %[[prod2]], %[[extent]]
292292
// CHECK: %[[cmp:.*]] = icmp sgt i64 %[[prod3]], 0
@@ -366,12 +366,13 @@ func.func @allocmem_array_with_holes_dynchar(%arg0: index, %arg1: index) -> !fir
366366
// CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1
367367
// CHECK: %[[VAL_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1
368368
// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1
369-
369+
func.func private @foo(%0: !fir.ref<!fir.class<none>>, %1: !fir.ref<!fir.class<!fir.array<?xnone>>>, %2: !fir.ref<!fir.box<none>>, %3: !fir.ref<!fir.box<!fir.array<?xnone>>>)
370370
func.func @alloca_unlimited_polymorphic_box() {
371371
%0 = fir.alloca !fir.class<none>
372372
%1 = fir.alloca !fir.class<!fir.array<?xnone>>
373373
%2 = fir.alloca !fir.box<none>
374374
%3 = fir.alloca !fir.box<!fir.array<?xnone>>
375+
fir.call @foo(%0, %1, %2, %3) : (!fir.ref<!fir.class<none>>, !fir.ref<!fir.class<!fir.array<?xnone>>>, !fir.ref<!fir.box<none>>, !fir.ref<!fir.box<!fir.array<?xnone>>>) -> ()
375376
return
376377
}
377378
// Note: allocmem of fir.box are not possible (fir::HeapType::verify does not

flang/test/Fir/arrexp.fir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ func.func @f6(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: f32) {
143143
%c9 = arith.constant 9 : index
144144
%c10 = arith.constant 10 : index
145145

146-
// CHECK: %[[EXT_GEP:.*]] = getelementptr {{.*}} %[[A]], i32 0, i32 7, i64 0, i32 1
146+
// CHECK: %[[EXT_GEP:.*]] = getelementptr {{.*}} %[[A]], i32 0, i32 7, i32 0, i32 1
147147
// CHECK: %[[EXTENT:.*]] = load i64, ptr %[[EXT_GEP]]
148-
// CHECK: %[[SIZE:.*]] = mul i64 4, %[[EXTENT]]
148+
// CHECK: %[[SIZE:.*]] = mul i64 %[[EXTENT]], 4
149149
// CHECK: %[[CMP:.*]] = icmp sgt i64 %[[SIZE]], 0
150150
// CHECK: %[[SZ:.*]] = select i1 %[[CMP]], i64 %[[SIZE]], i64 1
151151
// CHECK: %[[MALLOC:.*]] = call ptr @malloc(i64 %[[SZ]])

flang/test/Fir/basic-program.fir

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,6 @@ func.func @_QQmain() {
158158
// PASSES-NEXT: LowerNontemporalPass
159159
// PASSES-NEXT: FIRToLLVMLowering
160160
// PASSES-NEXT: ReconcileUnrealizedCasts
161+
// PASSES-NEXT: 'llvm.func' Pipeline
162+
// PASSES-NEXT: PrepareForOMPOffloadPrivatizationPass
161163
// PASSES-NEXT: LLVMIRLoweringPass

flang/test/Fir/box.fir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func.func @fa(%a : !fir.ref<!fir.array<100xf32>>) {
5757
// CHECK-SAME: ptr {{[^%]*}}%[[res:.*]], ptr {{[^%]*}}%[[arg0:.*]], i64 %[[arg1:.*]])
5858
func.func @b1(%arg0 : !fir.ref<!fir.char<1,?>>, %arg1 : index) -> !fir.box<!fir.char<1,?>> {
5959
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
60-
// CHECK: %[[size:.*]] = mul i64 1, %[[arg1]]
60+
// CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1
6161
// CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1
6262
// CHECK: insertvalue {{.*}} i32 20240719, 2
6363
// CHECK: insertvalue {{.*}} ptr %[[arg0]], 0
@@ -89,7 +89,7 @@ func.func @b2(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,5>>>, %arg1 : index) ->
8989
func.func @b3(%arg0 : !fir.ref<!fir.array<?x!fir.char<1,?>>>, %arg1 : index, %arg2 : index) -> !fir.box<!fir.array<?x!fir.char<1,?>>> {
9090
%1 = fir.shape %arg2 : (index) -> !fir.shape<1>
9191
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
92-
// CHECK: %[[size:.*]] = mul i64 1, %[[arg1]]
92+
// CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1
9393
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
9494
// CHECK: insertvalue {{.*}} i32 20240719, 2
9595
// CHECK: insertvalue {{.*}} i64 %[[arg2]], 7, 0, 1
@@ -108,7 +108,7 @@ func.func @b4(%arg0 : !fir.ref<!fir.array<7x!fir.char<1,?>>>, %arg1 : index) ->
108108
%c_7 = arith.constant 7 : index
109109
%1 = fir.shape %c_7 : (index) -> !fir.shape<1>
110110
// CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }
111-
// CHECK: %[[size:.*]] = mul i64 1, %[[arg1]]
111+
// CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1
112112
// CHECK: insertvalue {{.*}} i64 %[[size]], 1
113113
// CHECK: insertvalue {{.*}} i32 20240719, 2
114114
// CHECK: insertvalue {{.*}} i64 7, 7, 0, 1

0 commit comments

Comments
 (0)