Skip to content

Commit 2b8404d

Browse files
authored
merge main into amd-staging (llvm#1530)
2 parents 86879ac + 2cc5c44 commit 2b8404d

File tree

14 files changed

+377
-19
lines changed

14 files changed

+377
-19
lines changed

flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
! RUN: %flang_fc1 -fopenmp -emit-llvm %s -o - | FileCheck %s
2-
! XFAIL: *
32

43
! Combinational testing of control flow graph and builder insertion points
54
! in mlir-to-llvm conversion:

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,6 +1983,8 @@ class OpenMPIRBuilder {
19831983
/// \param IsNoWait A flag set if the reduction is marked as nowait.
19841984
/// \param IsByRef A flag set if the reduction is using reference
19851985
/// or direct value.
1986+
/// \param IsTeamsReduction Optional flag set if it is a teams
1987+
/// reduction.
19861988
InsertPointOrErrorTy createReductions(const LocationDescription &Loc,
19871989
InsertPointTy AllocaIP,
19881990
ArrayRef<ReductionInfo> ReductionInfos,

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3946,7 +3946,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
39463946
// function. Partial values are extracted from the type-erased array of
39473947
// pointers to private variables.
39483948
Error Err = populateReductionFunction(ReductionFunc, ReductionInfos, Builder,
3949-
IsByRef, false);
3949+
IsByRef, /*isGPU=*/false);
39503950
if (Err)
39513951
return Err;
39523952

llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2361,7 +2361,7 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
23612361
"256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
23622362
OpenMPIRBuilder OMPBuilder(*M);
23632363
OMPBuilder.Config.IsTargetDevice = true;
2364-
OMPBuilder.Config.IsGPU = true;
2364+
OMPBuilder.Config.setIsGPU(false);
23652365
OMPBuilder.initialize();
23662366
IRBuilder<> Builder(BB);
23672367
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,15 +1027,10 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
10271027
// variable allocated in the inlined region)
10281028
llvm::Value *var = builder.CreateAlloca(
10291029
moduleTranslation.convertType(reductionDecls[i].getType()));
1030-
var->setName("private_redvar");
10311030

1032-
llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
1031+
llvm::Type *ptrTy = builder.getPtrTy();
10331032
llvm::Value *castVar =
10341033
builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
1035-
// TODO: I (Sergio) just guessed casting phis[0] like it's done for var is
1036-
// what's supposed to happen with this code coming from a merge from main,
1037-
// but I don't actually know. Someone more familiar with it needs to check
1038-
// this.
10391034
llvm::Value *castPhi =
10401035
builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
10411036

@@ -1049,9 +1044,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
10491044
"allocaction is implicit for by-val reduction");
10501045
llvm::Value *var = builder.CreateAlloca(
10511046
moduleTranslation.convertType(reductionDecls[i].getType()));
1052-
var->setName("private_redvar");
1053-
1054-
llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
1047+
llvm::Type *ptrTy = builder.getPtrTy();
10551048
llvm::Value *castVar =
10561049
builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
10571050

@@ -1753,8 +1746,7 @@ static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
17531746
for (auto &use : ra.getUses()) {
17541747
auto *useOp = use.getOwner();
17551748
// Ignore debug uses.
1756-
if (mlir::isa<LLVM::DbgDeclareOp>(useOp) ||
1757-
mlir::isa<LLVM::DbgValueOp>(useOp)) {
1749+
if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
17581750
debugUses.push_back(useOp);
17591751
continue;
17601752
}
@@ -2478,8 +2470,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
24782470
builder.SetInsertPoint(tempTerminator);
24792471

24802472
llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
2481-
ompBuilder->createReductions(builder.saveIP(), allocaIP,
2482-
reductionInfos, isByRef, false, false);
2473+
ompBuilder->createReductions(
2474+
builder.saveIP(), allocaIP, reductionInfos, isByRef,
2475+
/*IsNoWait=*/false, /*IsTeamsReduction=*/false);
24832476
if (!contInsertPoint)
24842477
return contInsertPoint.takeError();
24852478

@@ -4968,7 +4961,7 @@ static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
49684961
template <typename OpTy>
49694962
static uint64_t getReductionDataSize(OpTy &op) {
49704963
if (op.getNumReductionVars() > 0) {
4971-
assert(op.getNumReductionVars() &&
4964+
assert(op.getNumReductionVars() == 1 &&
49724965
"Only 1 reduction variable currently supported");
49734966
mlir::Type reductionVarTy = op.getReductionVars()[0].getType();
49744967
Operation *opp = op.getOperation();
@@ -5094,6 +5087,8 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
50945087
attrs.MinThreads = 1;
50955088
attrs.MaxThreads.front() = combinedMaxThreadsVal;
50965089
attrs.ReductionDataSize = reductionDataSize;
5090+
// TODO: Allow modified buffer length similar to
5091+
// fopenmp-cuda-teams-reduction-recs-num flag in clang.
50975092
if (attrs.ReductionDataSize != 0)
50985093
attrs.ReductionBufferLength = 1024;
50995094
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// Only check the overall shape of the code and the presence of relevant
4+
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
5+
6+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
7+
omp.private {type = private} @_QFsimple_target_teams_only_reductionEindex__private_i32 : i32
8+
omp.declare_reduction @add_reduction_i32 : i32 init {
9+
^bb0(%arg0: i32):
10+
%0 = llvm.mlir.constant(0 : i32) : i32
11+
omp.yield(%0 : i32)
12+
} combiner {
13+
^bb0(%arg0: i32, %arg1: i32):
14+
%0 = llvm.add %arg0, %arg1 : i32
15+
omp.yield(%0 : i32)
16+
}
17+
llvm.func @simple_target_teams_only_reduction_() attributes {fir.internal_name = "_QPsimple_target_teams_only_reduction", frame_pointer = #llvm.framePointerKind<all>, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "gfx1030", target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx10-3-insts", "+gfx10-insts", "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize32"]>} {
18+
%0 = llvm.mlir.constant(1 : i64) : i64
19+
%1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr<5>
20+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
21+
%3 = llvm.mlir.constant(1 : i64) : i64
22+
%4 = llvm.alloca %3 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5>
23+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
24+
%6 = llvm.mlir.constant(0 : i32) : i32
25+
%7 = llvm.mlir.constant(1 : i64) : i64
26+
%8 = llvm.mlir.constant(1 : i64) : i64
27+
llvm.store %6, %2 : i32, !llvm.ptr
28+
%9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"}
29+
%10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"}
30+
omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) {
31+
%11 = llvm.mlir.constant(10000 : i32) : i32
32+
%12 = llvm.mlir.constant(1 : i32) : i32
33+
omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) {
34+
omp.distribute private(@_QFsimple_target_teams_only_reductionEindex__private_i32 %arg1 -> %arg3 : !llvm.ptr) {
35+
omp.loop_nest (%arg4) : i32 = (%12) to (%11) inclusive step (%12) {
36+
llvm.store %arg4, %arg3 : i32, !llvm.ptr
37+
%13 = llvm.load %arg2 : !llvm.ptr -> i32
38+
%14 = llvm.load %arg3 : !llvm.ptr -> i32
39+
%15 = llvm.add %13, %14 : i32
40+
llvm.store %15, %arg2 : i32, !llvm.ptr
41+
omp.yield
42+
}
43+
}
44+
omp.terminator
45+
}
46+
omp.terminator
47+
}
48+
llvm.return
49+
}
50+
}
51+
52+
// CHECK: call i32 @__kmpc_target_init
53+
// CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]]
54+
// CHECK: define internal void @[[OUTLINED]]
55+
// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2
56+
// CHECK: icmp eq i32 %[[MASTER]], 1
57+
// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
58+
59+
// CHECK: call void @__kmpc_barrier
60+
61+
// CHECK: [[THEN]]:
62+
// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
63+
// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
64+
// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
65+
// CHECK-NEXT: store i32 %[[FINAL_RESULT]]
66+
67+
68+
// CHECK: call void @__kmpc_distribute_static_loop_4u
69+
// CHECK-SAME: [[OUTLINED2:__omp_offloading_[A-Za-z0-9_.]*]]
70+
71+
// CHECK: define internal void @[[OUTLINED2]]
72+
// CHECK: %[[TEAM_RHS:[A-Za-z0-9_.]*]] = load i32
73+
// CHECK-NEXT: %[[TEAM_LHS:[A-Za-z0-9_.]*]] = load i32
74+
// CHECK-NEXT: %[[TEAM_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[TEAM_RHS]], %[[TEAM_LHS]]
75+
// CHECK-NEXT: store i32 %[[TEAM_RESULT]]
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// Only check the overall shape of the code and the presence of relevant
4+
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
5+
6+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
7+
omp.declare_reduction @add_reduction_i32 : i32 init {
8+
^bb0(%arg0: i32):
9+
%0 = llvm.mlir.constant(0 : i32) : i32
10+
omp.yield(%0 : i32)
11+
} combiner {
12+
^bb0(%arg0: i32, %arg1: i32):
13+
%0 = llvm.add %arg0, %arg1 : i32
14+
omp.yield(%0 : i32)
15+
}
16+
llvm.func @simple_target_teams_only_reduction_() attributes {fir.internal_name = "_QPsimple_target_teams_only_reduction", frame_pointer = #llvm.framePointerKind<all>, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "gfx1030", target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx10-3-insts", "+gfx10-insts", "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize32"]>} {
17+
%0 = llvm.mlir.constant(1 : i64) : i64
18+
%1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr<5>
19+
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
20+
%3 = llvm.mlir.constant(1 : i64) : i64
21+
%4 = llvm.alloca %3 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5>
22+
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
23+
%6 = llvm.mlir.constant(0 : i32) : i32
24+
%7 = llvm.mlir.constant(1 : i64) : i64
25+
%8 = llvm.mlir.constant(1 : i64) : i64
26+
llvm.store %6, %2 : i32, !llvm.ptr
27+
%9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"}
28+
%10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"}
29+
omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) {
30+
%11 = llvm.mlir.constant(0 : index) : i64
31+
%12 = llvm.mlir.constant(10000 : index) : i64
32+
%13 = llvm.mlir.constant(1 : index) : i64
33+
omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) {
34+
%14 = llvm.trunc %13 : i64 to i32
35+
llvm.br ^bb1(%14, %12 : i32, i64)
36+
^bb1(%15: i32, %16: i64): // 2 preds: ^bb0, ^bb2
37+
%17 = llvm.icmp "sgt" %16, %11 : i64
38+
llvm.cond_br %17, ^bb2, ^bb3
39+
^bb2: // pred: ^bb1
40+
llvm.store %15, %arg1 : i32, !llvm.ptr
41+
%18 = llvm.load %arg2 : !llvm.ptr -> i32
42+
%19 = llvm.load %arg1 : !llvm.ptr -> i32
43+
%20 = llvm.add %18, %19 : i32
44+
llvm.store %20, %arg2 : i32, !llvm.ptr
45+
%21 = llvm.load %arg1 : !llvm.ptr -> i32
46+
%22 = llvm.add %21, %14 overflow<nsw> : i32
47+
%23 = llvm.sub %16, %13 : i64
48+
llvm.br ^bb1(%22, %23 : i32, i64)
49+
^bb3: // pred: ^bb1
50+
llvm.store %15, %arg1 : i32, !llvm.ptr
51+
omp.terminator
52+
}
53+
omp.terminator
54+
}
55+
llvm.return
56+
}
57+
}
58+
59+
// CHECK: call i32 @__kmpc_target_init
60+
// CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]]
61+
// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2
62+
// CHECK: icmp eq i32 %[[MASTER]], 1
63+
// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
64+
// CHECK: [[THEN]]:
65+
// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
66+
// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
67+
// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
68+
// CHECK-NEXT: store i32 %[[FINAL_RESULT]]
69+
70+
// CHECK: call void @__kmpc_barrier
71+
// CHECK: call void @__kmpc_target_deinit
72+
73+
// CHECK: define internal void @[[OUTLINED]]
74+
// Skip to the loop
75+
// CHECK: br i1
76+
// CHECK: %[[TEAM_RHS:[A-Za-z0-9_.]*]] = load i32
77+
// CHECK-NEXT: %[[TEAM_LHS:[A-Za-z0-9_.]*]] = load i32
78+
// CHECK-NEXT: %[[TEAM_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[TEAM_RHS]], %[[TEAM_LHS]]
79+
// CHECK-NEXT: store i32 %[[TEAM_RESULT]]

mlir/test/Target/LLVMIR/openmp-reduction.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
2-
2+
// XFAIL: *
33
// Only check the overall shape of the code and the presence of relevant
44
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
55

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// Only check the overall shape of the code and the presence of relevant
4+
// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
5+
6+
omp.private {type = private} @_QFsimple_teams_reductionEindex__private_i32 : i32
7+
omp.declare_reduction @add_reduction_i32 : i32 init {
8+
^bb0(%arg0: i32):
9+
%0 = llvm.mlir.constant(0 : i32) : i32
10+
omp.yield(%0 : i32)
11+
} combiner {
12+
^bb0(%arg0: i32, %arg1: i32):
13+
%0 = llvm.add %arg0, %arg1 : i32
14+
omp.yield(%0 : i32)
15+
}
16+
llvm.func @simple_teams_reduction_() attributes {fir.internal_name = "_QPsimple_teams_reduction", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
17+
%0 = llvm.mlir.constant(1 : i64) : i64
18+
%1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr
19+
%2 = llvm.mlir.constant(1 : i64) : i64
20+
%3 = llvm.alloca %2 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr
21+
%4 = llvm.mlir.constant(10000 : i32) : i32
22+
%5 = llvm.mlir.constant(1 : i32) : i32
23+
%6 = llvm.mlir.constant(0 : i32) : i32
24+
%7 = llvm.mlir.constant(1 : i64) : i64
25+
%8 = llvm.mlir.constant(1 : i64) : i64
26+
llvm.store %6, %1 : i32, !llvm.ptr
27+
omp.teams reduction(@add_reduction_i32 %1 -> %arg0 : !llvm.ptr) {
28+
omp.distribute private(@_QFsimple_teams_reductionEindex__private_i32 %3 -> %arg1 : !llvm.ptr) {
29+
omp.loop_nest (%arg2) : i32 = (%5) to (%4) inclusive step (%5) {
30+
llvm.store %arg2, %arg1 : i32, !llvm.ptr
31+
%9 = llvm.load %arg0 : !llvm.ptr -> i32
32+
%10 = llvm.load %arg1 : !llvm.ptr -> i32
33+
%11 = llvm.add %9, %10 : i32
34+
llvm.store %11, %arg0 : i32, !llvm.ptr
35+
omp.yield
36+
}
37+
}
38+
omp.terminator
39+
}
40+
llvm.return
41+
}
42+
// Call to outlined function
43+
// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams
44+
// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
45+
46+
// Outlined function.
47+
// CHECK: define internal void @[[OUTLINED]]
48+
49+
// Private reduction variable and its initialization.
50+
// CHECK: %[[PRIVATE:.+]] = alloca i32
51+
// CHECK: store i32 0, ptr %[[PRIVATE]]
52+
53+
// Call to the reduction function.
54+
// CHECK: call i32 @__kmpc_reduce
55+
// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
56+
57+
// Atomic version not generated
58+
// CHECK: unreachable
59+
60+
// Non atomic version
61+
// CHECK: call void @__kmpc_end_reduce
62+
63+
// Finalize
64+
// CHECK: br label %[[FINALIZE:.+]]
65+
66+
// CHECK: [[FINALIZE]]:
67+
// CHECK: call void @__kmpc_barrier
68+
69+
// Reduction function.
70+
// CHECK: define internal void @[[REDFUNC]]
71+
// CHECK: add i32

0 commit comments

Comments
 (0)