ROCm
diff --git a/‎flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90‎
Lines changed: 0 additions & 1 deletion b/‎flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h‎
Lines changed: 2 additions & 0 deletions b/‎llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp‎
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp‎
Lines changed: 1 addition & 1 deletion b/‎llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp‎
Lines changed: 9 additions & 14 deletions b/‎mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir‎
Lines changed: 75 additions & 0 deletions b/‎mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir‎
Lines changed: 79 additions & 0 deletions b/‎mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎mlir/test/Target/LLVMIR/openmp-reduction.mlir‎
Lines changed: 1 addition & 1 deletion b/‎mlir/test/Target/LLVMIR/openmp-reduction.mlir‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir‎
Lines changed: 71 additions & 0 deletions b/‎mlir/test/Target/LLVMIR/openmp-teams-distribute-reduction.mlir‎
Lines changed: 71 additions & 0 deletions
@@ -1,5 +1,4 @@
 ! RUN: %flang_fc1 -fopenmp -emit-llvm %s -o - | FileCheck %s
-! XFAIL: *
 
 ! Combinational testing of control flow graph and builder insertion points
 ! in mlir-to-llvm conversion:
 
@@ -1983,6 +1983,8 @@ class OpenMPIRBuilder {
   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
   /// \param IsByRef            A flag set if the reduction is using reference
   /// or direct value.
+  /// \param IsTeamsReduction   Optional flag set if it is a teams
+  ///                           reduction.
   InsertPointOrErrorTy createReductions(const LocationDescription &Loc,
                                         InsertPointTy AllocaIP,
                                         ArrayRef<ReductionInfo> ReductionInfos,
 
@@ -3946,7 +3946,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
   // function. Partial values are extracted from the type-erased array of
   // pointers to private variables.
   Error Err = populateReductionFunction(ReductionFunc, ReductionInfos, Builder,
-                                        IsByRef, false);
+                                        IsByRef, /*isGPU=*/false);
   if (Err)
     return Err;
 
 
@@ -2361,7 +2361,7 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
       "256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8");
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.Config.IsTargetDevice = true;
-  OMPBuilder.Config.IsGPU = true;
+  OMPBuilder.Config.setIsGPU(false);
   OMPBuilder.initialize();
   IRBuilder<> Builder(BB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
@@ -1027,15 +1027,10 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
       // variable allocated in the inlined region)
       llvm::Value *var = builder.CreateAlloca(
           moduleTranslation.convertType(reductionDecls[i].getType()));
-      var->setName("private_redvar");
 
-      llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
+      llvm::Type *ptrTy = builder.getPtrTy();
       llvm::Value *castVar =
           builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
-      // TODO: I (Sergio) just guessed casting phis[0] like it's done for var is
-      // what's supposed to happen with this code coming from a merge from main,
-      // but I don't actually know. Someone more familiar with it needs to check
-      // this.
       llvm::Value *castPhi =
           builder.CreatePointerBitCastOrAddrSpaceCast(phis[0], ptrTy);
 
@@ -1049,9 +1044,7 @@ allocReductionVars(T loop, ArrayRef<BlockArgument> reductionArgs,
              "allocaction is implicit for by-val reduction");
       llvm::Value *var = builder.CreateAlloca(
           moduleTranslation.convertType(reductionDecls[i].getType()));
-      var->setName("private_redvar");
-
-      llvm::Type *ptrTy = llvm::PointerType::getUnqual(builder.getContext());
+      llvm::Type *ptrTy = builder.getPtrTy();
       llvm::Value *castVar =
           builder.CreatePointerBitCastOrAddrSpaceCast(var, ptrTy);
 
@@ -1753,8 +1746,7 @@ static bool teamsReductionContainedInDistribute(omp::TeamsOp teamsOp) {
     for (auto &use : ra.getUses()) {
       auto *useOp = use.getOwner();
       // Ignore debug uses.
-      if (mlir::isa<LLVM::DbgDeclareOp>(useOp) ||
-          mlir::isa<LLVM::DbgValueOp>(useOp)) {
+      if (mlir::isa<LLVM::DbgDeclareOp, LLVM::DbgValueOp>(useOp)) {
         debugUses.push_back(useOp);
         continue;
       }
@@ -2478,8 +2470,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       builder.SetInsertPoint(tempTerminator);
 
       llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
-          ompBuilder->createReductions(builder.saveIP(), allocaIP,
-                                       reductionInfos, isByRef, false, false);
+          ompBuilder->createReductions(
+              builder.saveIP(), allocaIP, reductionInfos, isByRef,
+              /*IsNoWait=*/false, /*IsTeamsReduction=*/false);
       if (!contInsertPoint)
         return contInsertPoint.takeError();
 
@@ -4968,7 +4961,7 @@ static uint64_t getTypeByteSize(mlir::Type type, const DataLayout &dl) {
 template <typename OpTy>
 static uint64_t getReductionDataSize(OpTy &op) {
   if (op.getNumReductionVars() > 0) {
-    assert(op.getNumReductionVars() &&
+    assert(op.getNumReductionVars() == 1 &&
            "Only 1 reduction variable currently supported");
     mlir::Type reductionVarTy = op.getReductionVars()[0].getType();
     Operation *opp = op.getOperation();
@@ -5094,6 +5087,8 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp,
   attrs.MinThreads = 1;
   attrs.MaxThreads.front() = combinedMaxThreadsVal;
   attrs.ReductionDataSize = reductionDataSize;
+  // TODO: Allow modified buffer length similar to
+  // fopenmp-cuda-teams-reduction-recs-num flag in clang.
   if (attrs.ReductionDataSize != 0)
     attrs.ReductionBufferLength = 1024;
 }
 
@@ -0,0 +1,75 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Only check the overall shape of the code and the presence of relevant
+// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
+  omp.private {type = private} @_QFsimple_target_teams_only_reductionEindex__private_i32 : i32
+  omp.declare_reduction @add_reduction_i32 : i32 init {
+  ^bb0(%arg0: i32):
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    omp.yield(%0 : i32)
+  } combiner {
+  ^bb0(%arg0: i32, %arg1: i32):
+    %0 = llvm.add %arg0, %arg1 : i32
+    omp.yield(%0 : i32)
+  }
+  llvm.func @simple_target_teams_only_reduction_() attributes {fir.internal_name = "_QPsimple_target_teams_only_reduction", frame_pointer = #llvm.framePointerKind<all>, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "gfx1030", target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx10-3-insts", "+gfx10-insts", "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize32"]>} {
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5>
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+    %6 = llvm.mlir.constant(0 : i32) : i32
+    %7 = llvm.mlir.constant(1 : i64) : i64
+    %8 = llvm.mlir.constant(1 : i64) : i64
+    llvm.store %6, %2 : i32, !llvm.ptr
+    %9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"}
+    %10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"}
+    omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      %11 = llvm.mlir.constant(10000 : i32) : i32
+      %12 = llvm.mlir.constant(1 : i32) : i32
+      omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) {
+        omp.distribute private(@_QFsimple_target_teams_only_reductionEindex__private_i32 %arg1 -> %arg3 : !llvm.ptr) {
+          omp.loop_nest (%arg4) : i32 = (%12) to (%11) inclusive step (%12) {
+            llvm.store %arg4, %arg3 : i32, !llvm.ptr
+            %13 = llvm.load %arg2 : !llvm.ptr -> i32
+            %14 = llvm.load %arg3 : !llvm.ptr -> i32
+            %15 = llvm.add %13, %14 : i32
+            llvm.store %15, %arg2 : i32, !llvm.ptr
+            omp.yield
+          }
+        }
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: call i32 @__kmpc_target_init
+// CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]]
+// CHECK: define internal void @[[OUTLINED]]
+// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2
+// CHECK: icmp eq i32 %[[MASTER]], 1
+// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
+
+// CHECK: call void @__kmpc_barrier
+
+// CHECK: [[THEN]]:
+// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
+// CHECK-NEXT: store i32 %[[FINAL_RESULT]]
+
+
+// CHECK: call void @__kmpc_distribute_static_loop_4u
+// CHECK-SAME: [[OUTLINED2:__omp_offloading_[A-Za-z0-9_.]*]]
+
+// CHECK: define internal void @[[OUTLINED2]]
+// CHECK: %[[TEAM_RHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[TEAM_LHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[TEAM_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[TEAM_RHS]], %[[TEAM_LHS]]
+// CHECK-NEXT: store i32 %[[TEAM_RESULT]]
@@ -0,0 +1,79 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Only check the overall shape of the code and the presence of relevant
+// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
+  omp.declare_reduction @add_reduction_i32 : i32 init {
+  ^bb0(%arg0: i32):
+    %0 = llvm.mlir.constant(0 : i32) : i32
+    omp.yield(%0 : i32)
+  } combiner {
+  ^bb0(%arg0: i32, %arg1: i32):
+    %0 = llvm.add %arg0, %arg1 : i32
+    omp.yield(%0 : i32)
+  }
+  llvm.func @simple_target_teams_only_reduction_() attributes {fir.internal_name = "_QPsimple_target_teams_only_reduction", frame_pointer = #llvm.framePointerKind<all>, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "gfx1030", target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx10-3-insts", "+gfx10-insts", "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize32"]>} {
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    %3 = llvm.mlir.constant(1 : i64) : i64
+    %4 = llvm.alloca %3 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5>
+    %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+    %6 = llvm.mlir.constant(0 : i32) : i32
+    %7 = llvm.mlir.constant(1 : i64) : i64
+    %8 = llvm.mlir.constant(1 : i64) : i64
+    llvm.store %6, %2 : i32, !llvm.ptr
+    %9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"}
+    %10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"}
+    omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+      %11 = llvm.mlir.constant(0 : index) : i64
+      %12 = llvm.mlir.constant(10000 : index) : i64
+      %13 = llvm.mlir.constant(1 : index) : i64
+      omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) {
+        %14 = llvm.trunc %13 : i64 to i32
+        llvm.br ^bb1(%14, %12 : i32, i64)
+      ^bb1(%15: i32, %16: i64):  // 2 preds: ^bb0, ^bb2
+        %17 = llvm.icmp "sgt" %16, %11 : i64
+        llvm.cond_br %17, ^bb2, ^bb3
+      ^bb2:  // pred: ^bb1
+        llvm.store %15, %arg1 : i32, !llvm.ptr
+        %18 = llvm.load %arg2 : !llvm.ptr -> i32
+        %19 = llvm.load %arg1 : !llvm.ptr -> i32
+        %20 = llvm.add %18, %19 : i32
+        llvm.store %20, %arg2 : i32, !llvm.ptr
+        %21 = llvm.load %arg1 : !llvm.ptr -> i32
+        %22 = llvm.add %21, %14 overflow<nsw> : i32
+        %23 = llvm.sub %16, %13 : i64
+        llvm.br ^bb1(%22, %23 : i32, i64)
+      ^bb3:  // pred: ^bb1
+        llvm.store %15, %arg1 : i32, !llvm.ptr
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK: call i32 @__kmpc_target_init
+// CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]]
+// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2
+// CHECK: icmp eq i32 %[[MASTER]], 1
+// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]]
+// CHECK: [[THEN]]:
+// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]]
+// CHECK-NEXT: store i32 %[[FINAL_RESULT]]
+
+// CHECK: call void @__kmpc_barrier
+// CHECK: call void @__kmpc_target_deinit
+
+// CHECK: define internal void @[[OUTLINED]]
+// Skip to the loop
+// CHECK: br i1
+// CHECK: %[[TEAM_RHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[TEAM_LHS:[A-Za-z0-9_.]*]] = load i32
+// CHECK-NEXT: %[[TEAM_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[TEAM_RHS]], %[[TEAM_LHS]]
+// CHECK-NEXT: store i32 %[[TEAM_RESULT]]
@@ -1,5 +1,5 @@
 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
-
+// XFAIL: *
 // Only check the overall shape of the code and the presence of relevant
 // runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
 
 
@@ -0,0 +1,71 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Only check the overall shape of the code and the presence of relevant
+// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
+
+omp.private {type = private} @_QFsimple_teams_reductionEindex__private_i32 : i32
+omp.declare_reduction @add_reduction_i32 : i32 init {
+^bb0(%arg0: i32):
+  %0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%0 : i32)
+} combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %0 = llvm.add %arg0, %arg1 : i32
+  omp.yield(%0 : i32)
+}
+llvm.func @simple_teams_reduction_() attributes {fir.internal_name = "_QPsimple_teams_reduction", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
+  %0 = llvm.mlir.constant(1 : i64) : i64
+  %1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr
+  %2 = llvm.mlir.constant(1 : i64) : i64
+  %3 = llvm.alloca %2 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr
+  %4 = llvm.mlir.constant(10000 : i32) : i32
+  %5 = llvm.mlir.constant(1 : i32) : i32
+  %6 = llvm.mlir.constant(0 : i32) : i32
+  %7 = llvm.mlir.constant(1 : i64) : i64
+  %8 = llvm.mlir.constant(1 : i64) : i64
+  llvm.store %6, %1 : i32, !llvm.ptr
+  omp.teams reduction(@add_reduction_i32 %1 -> %arg0 : !llvm.ptr) {
+    omp.distribute private(@_QFsimple_teams_reductionEindex__private_i32 %3 -> %arg1 : !llvm.ptr) {
+      omp.loop_nest (%arg2) : i32 = (%5) to (%4) inclusive step (%5) {
+        llvm.store %arg2, %arg1 : i32, !llvm.ptr
+        %9 = llvm.load %arg0 : !llvm.ptr -> i32
+        %10 = llvm.load %arg1 : !llvm.ptr -> i32
+        %11 = llvm.add %9, %10 : i32
+        llvm.store %11, %arg0 : i32, !llvm.ptr
+        omp.yield
+      }
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+// Call to outlined function
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca i32
+// CHECK: store i32 0, ptr %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic version not generated
+// CHECK: unreachable
+
+// Non atomic version
+// CHECK: call void @__kmpc_end_reduce
+
+// Finalize
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: add i32
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,4 @@`
`1`	`1`	`! RUN: %flang_fc1 -fopenmp -emit-llvm %s -o - \| FileCheck %s`
`2`		`-! XFAIL: *`
`3`	`2`
`4`	`3`	`! Combinational testing of control flow graph and builder insertion points`
`5`	`4`	`! in mlir-to-llvm conversion:`