From 9688aa24d53840a223a872498e3d9ce54cb09390 Mon Sep 17 00:00:00 2001 From: ergawy Date: Fri, 3 Jan 2025 11:18:49 -0600 Subject: [PATCH] [flang][OpenMP] Allow saving first block of an OMP region for allocas Problem: -------- Consider the following example: ```fortran program test real :: x(1) integer :: i !$omp parallel do reduction(+:x) do i = 1,1 x = 1 end do !$omp end parallel do end program ``` The HLFIR+OMP IR for this example looks like this: ```mlir func.func @_QQmain() { ... omp.parallel { %5 = fir.embox %4#0(%3) : (!fir.ref>, !fir.shape<1>) -> !fir.box> %6 = fir.alloca !fir.box> ... omp.wsloop private(@_QFEi_private_ref_i32 %1#0 -> %arg0 : !fir.ref) reduction(byref @add_reduction_byref_box_1xf32 %6 -> %arg1 : !fir.ref>>) { omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32_0) inclusive step (%c1_i32_1) { ... omp.yield } } omp.terminator } return } ``` The problem addressed by this PR is related to: the `alloca` in the `omp.parallel` region + the related `reduction` clause on the `omp.wsloop` op. When we try translate the reduction from MLIR to LLVM, we have to choose an `alloca` insertion point. This happens in `convertOmpWsloop` where at entry to that function, this is what the LLVM module looks like: ```llvm define void @_QQmain() { %tid.addr = alloca i32, align 4 ... entry: %omp_global_thread_num = call i32 @__kmpc_global_thread_num(ptr @1) br label %omp.par.entry omp.par.entry: %tid.addr.local = alloca i32, align 4 ... br label %omp.par.region omp.par.region: br label %omp.par.region1 omp.par.region1: ... %5 = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ``` Possible solution: ------------------ Now, when we choose an `alloca` insertion point for the reduction, this is the chosen block `omp.par.entry` (without the changes in this PR). The problem is that the allocation needed for the reduction needs to reference the `%5` SSA value. This results in inserting allocations in `omp.par.entry` that reference allocations in a later block `omp.par.region1` which causes the `Instruction does not dominate all uses!` error. The solution proposed by this PR is to allow `convertOmpOpRegions` to optionally save the first block of the OpenMP region being converted as an alloca block. This means that, for the above example, the allocation point chosen for the reduction will be in the `omp.par.region1` block. For now, this new optional argument is enbled only for `parallel` and `target` ops. --- .../OpenMP/atomic-capture-complex.f90 | 6 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 61 +++++++++++------- ...mptarget-byref-bycopy-generation-host.mlir | 2 +- mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 16 ++--- .../LLVMIR/omptarget-parallel-llvm.mlir | 4 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 20 +++--- .../openmp-parallel-reduction-multiblock.mlir | 2 +- .../openmp-reduction-array-sections.mlir | 62 ++++++++++--------- .../LLVMIR/openmp-reduction-init-arg.mlir | 2 +- .../LLVMIR/openmp-reduction-sections.mlir | 44 +++++++------ 10 files changed, 122 insertions(+), 97 deletions(-) diff --git a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 index 4ffd18097d79e..a76cbb643ef8c 100644 --- a/flang/test/Integration/OpenMP/atomic-capture-complex.f90 +++ b/flang/test/Integration/OpenMP/atomic-capture-complex.f90 @@ -13,16 +13,16 @@ !CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8 !CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 !CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 -!CHECK: br label %entry +!CHECK: br label %[[ENTRY:.*]] -!CHECK: entry: +!CHECK: [[ENTRY]]: !CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 !CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) !CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 !CHECK: br label %.atomic.cont !CHECK: .atomic.cont -!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +!CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %[[ENTRY]] ], [ %{{.*}}, %.atomic.cont ] !CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 !CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 !CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 87cb7f03fec6a..384799c021ac0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -345,31 +345,37 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, allocaInsertPoint = frame.allocaInsertPoint; return WalkResult::interrupt(); }); - if (walkResult.wasInterrupted()) - return allocaInsertPoint; // Otherwise, insert to the entry block of the surrounding function. - // If the current IRBuilder InsertPoint is the function's entry, it cannot - // also be used for alloca insertion which would result in insertion order - // confusion. Create a new BasicBlock for the Builder and use the entry block - // for the allocs. + if (!walkResult.wasInterrupted()) { + llvm::BasicBlock &funcEntryBlock = + builder.GetInsertBlock()->getParent()->getEntryBlock(); + allocaInsertPoint = llvm::OpenMPIRBuilder::InsertPointTy( + &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); + } + + // If the current IRBuilder insertion block is the same as the alloca + // insertion block, it cannot also be used for alloca insertion which would + // result in insertion order confusion. Create a new BasicBlock for the + // Builder and use the entry block for the allocs. + // // TODO: Create a dedicated alloca BasicBlock at function creation such that // we do not need to move the current InertPoint here. - if (builder.GetInsertBlock() == - &builder.GetInsertBlock()->getParent()->getEntryBlock()) { + if (builder.GetInsertBlock() == allocaInsertPoint.getBlock()) { assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && "Assuming end of basic block"); - llvm::BasicBlock *entryBB = llvm::BasicBlock::Create( - builder.getContext(), "entry", builder.GetInsertBlock()->getParent(), - builder.GetInsertBlock()->getNextNode()); - builder.CreateBr(entryBB); - builder.SetInsertPoint(entryBB); + auto *insertCont = splitBB( + llvm::OpenMPIRBuilder::InsertPointTy( + allocaInsertPoint.getBlock(), allocaInsertPoint.getBlock()->end()), + true, "insert.cont"); + builder.SetInsertPoint(insertCont, insertCont->end()); } - llvm::BasicBlock &funcEntryBlock = - builder.GetInsertBlock()->getParent()->getEntryBlock(); return llvm::OpenMPIRBuilder::InsertPointTy( - &funcEntryBlock, funcEntryBlock.getFirstInsertionPt()); + allocaInsertPoint.getBlock(), + allocaInsertPoint.getPoint() != allocaInsertPoint.getBlock()->end() + ? allocaInsertPoint.getPoint() + : allocaInsertPoint.getBlock()->getFirstInsertionPt()); } /// Converts the given region that appears within an OpenMP dialect operation to @@ -380,7 +386,8 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, static llvm::Expected convertOmpOpRegions( Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, - SmallVectorImpl *continuationBlockPHIs = nullptr) { + SmallVectorImpl *continuationBlockPHIs = nullptr, + bool saveFirstBlockForAlloca = false) { llvm::BasicBlock *continuationBlock = splitBB(builder, true, "omp.region.cont"); llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); @@ -441,6 +448,14 @@ static llvm::Expected convertOmpOpRegions( // Convert blocks one by one in topological order to ensure // defs are converted before uses. SetVector blocks = getBlocksSortedByDominance(region); + llvm::BasicBlock *firstLLVMBB = moduleTranslation.lookupBlock(blocks.front()); + std::optional> + frame; + + if (saveFirstBlockForAlloca) + frame.emplace(moduleTranslation, llvm::OpenMPIRBuilder::InsertPointTy( + firstLLVMBB, firstLLVMBB->end())); + for (Block *bb : blocks) { llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb); // Retarget the branch of the entry block to the entry block of the @@ -2093,15 +2108,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation::SaveStack mappingGuard( moduleTranslation, reductionVariableMap); - // Save the alloca insertion point on ModuleTranslation stack for use in - // nested regions. - LLVM::ModuleTranslation::SaveStack frame( - moduleTranslation, allocaIP); - // ParallelOp has only one region associated with it. builder.restoreIP(codeGenIP); llvm::Expected regionBlock = convertOmpOpRegions( - opInst.getRegion(), "omp.par.region", builder, moduleTranslation); + opInst.getRegion(), "omp.par.region", builder, moduleTranslation, + /*continuationBlockPHIs=*/nullptr, /*saveFirstBlockForAlloca=*/true); if (!regionBlock) return regionBlock.takeError(); @@ -2186,6 +2197,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = @@ -4022,7 +4034,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(codeGenIP); llvm::Expected exitBlock = convertOmpOpRegions( - targetRegion, "omp.target", builder, moduleTranslation); + targetRegion, "omp.target", builder, moduleTranslation, + /*continuationBlockPHIs=*/nullptr, /*saveFirstBlockForAlloca=*/true); if (!exitBlock) return exitBlock.takeError(); diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir index 871f5caf7b2ff..e4da548e84a1c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir @@ -26,7 +26,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: define void @_QQmain() { // CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8 -// CHECK: entry: ; preds = %0 +// CHECK: {{.*}}: ; preds = %0 // CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4 // CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4 // CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 7f21095763a39..e6a3c54c6957f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -20,7 +20,7 @@ llvm.func @_QPopenmp_target_data() { // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: %[[VAL_3:.*]] = alloca i32, i64 1, align 4 // CHECK: br label %[[VAL_4:.*]] -// CHECK: entry: ; preds = %[[VAL_5:.*]] +// CHECK: [[VAL_4]]: ; preds = %[[VAL_5:.*]] // CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_3]], ptr %[[VAL_6]], align 8 // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -65,7 +65,7 @@ llvm.func @_QPopenmp_target_data_region(%0 : !llvm.ptr) { // CHECK: %[[VAL_1:.*]] = alloca [1 x ptr], align 8 // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: br label %[[VAL_3:.*]] -// CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: [[VAL_3]]: ; preds = %[[VAL_4:.*]] // CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds [1024 x i32], ptr %[[ARR_DATA:.*]], i64 0, i64 0 // CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[ARR_DATA]], ptr %[[VAL_5]], align 8 @@ -151,7 +151,7 @@ llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr, %3 : !llvm.ptr) { // CHECK: %[[VAL_9:.*]] = icmp slt i32 %[[VAL_8]], 10 // CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_6]], align 4 // CHECK: br label %[[VAL_11:.*]] -// CHECK: entry: ; preds = %[[VAL_12:.*]] +// CHECK: [[VAL_11]]: ; preds = %[[VAL_12:.*]] // CHECK: br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]] // CHECK: omp_if.then: ; preds = %[[VAL_11]] // CHECK: %[[ARR_OFFSET1:.*]] = getelementptr inbounds [1024 x i32], ptr %[[VAL_16:.*]], i64 0, i64 0 @@ -228,7 +228,7 @@ llvm.func @_QPopenmp_target_use_dev_ptr() { // CHECK: %[[VAL_3:.*]] = alloca ptr, align 8 // CHECK: %[[VAL_4:.*]] = alloca ptr, i64 1, align 8 // CHECK: br label %[[VAL_5:.*]] -// CHECK: entry: ; preds = %[[VAL_6:.*]] +// CHECK: [[VAL_5]]: ; preds = %[[VAL_6:.*]] // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_4]], ptr %[[VAL_7]], align 8 // CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -271,7 +271,7 @@ llvm.func @_QPopenmp_target_use_dev_addr() { // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: %[[VAL_3:.*]] = alloca ptr, i64 1, align 8 // CHECK: br label %[[VAL_4:.*]] -// CHECK: entry: ; preds = %[[VAL_5:.*]] +// CHECK: [[VAL_4]]: ; preds = %[[VAL_5:.*]] // CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_3]], ptr %[[VAL_6]], align 8 // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -312,7 +312,7 @@ llvm.func @_QPopenmp_target_use_dev_addr_no_ptr() { // CHECK: %[[VAL_2:.*]] = alloca [1 x ptr], align 8 // CHECK: %[[VAL_3:.*]] = alloca i32, i64 1, align 4 // CHECK: br label %[[VAL_4:.*]] -// CHECK: entry: ; preds = %[[VAL_5:.*]] +// CHECK: [[VAL_4]]: ; preds = %[[VAL_5:.*]] // CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_3]], ptr %[[VAL_6]], align 8 // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -359,7 +359,7 @@ llvm.func @_QPopenmp_target_use_dev_addr_nomap() { // CHECK: %[[VAL_3:.*]] = alloca ptr, i64 1, align 8 // CHECK: %[[VAL_4:.*]] = alloca ptr, i64 1, align 8 // CHECK: br label %[[VAL_5:.*]] -// CHECK: entry: ; preds = %[[VAL_6:.*]] +// CHECK: [[VAL_5]]: ; preds = %[[VAL_6:.*]] // CHECK: %[[VAL_7:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_4]], ptr %[[VAL_7]], align 8 // CHECK: %[[VAL_8:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 @@ -418,7 +418,7 @@ llvm.func @_QPopenmp_target_use_dev_both() { // CHECK: %[[VAL_4:.*]] = alloca ptr, i64 1, align 8 // CHECK: %[[VAL_5:.*]] = alloca ptr, i64 1, align 8 // CHECK: br label %[[VAL_6:.*]] -// CHECK: entry: ; preds = %[[VAL_7:.*]] +// CHECK: [[VAL_6]]: ; preds = %[[VAL_7:.*]] // CHECK: %[[VAL_8:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 // CHECK: store ptr %[[VAL_4]], ptr %[[VAL_8]], align 8 // CHECK: %[[VAL_9:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index 4903656c22ec7..e586110529c00 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -54,9 +54,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]]( // CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) { // CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr // CHECK: %[[STRUCTARG:.*]] = alloca { ptr }, align 8, addrspace(5) -// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr // CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5) // CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr // CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8 @@ -64,6 +62,8 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1 // CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] // CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8 +// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr +// CHECK: %[[STRUCTARG_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[STRUCTARG]] to ptr // CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr addrspace(5) %[[STRUCTARG]], i32 0, i32 0 // CHECK: store ptr %[[TMP6]], ptr addrspace(5) %[[GEP_]], align 8 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 44e32c3f35f9b..1177dccab80cb 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -1415,16 +1415,16 @@ llvm.func @omp_atomic_update(%x:!llvm.ptr, %expr: i32, %xbool: !llvm.ptr, %exprb //CHECK: {{.*}} = alloca { float, float }, i64 1, align 8 //CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 -//CHECK: br label %entry +//CHECK: br label %[[ENTRY:.*]] -//CHECK: entry: +//CHECK: [[ENTRY]]: //CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 //CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) //CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 //CHECK: br label %.atomic.cont //CHECK: .atomic.cont -//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %{{.*}} ], [ %{{.*}}, %.atomic.cont ] //CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 //CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 //CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 @@ -1467,16 +1467,16 @@ llvm.func @_QPomp_atomic_update_complex() { //CHECK: %[[VAL_1:.*]] = alloca { float, float }, i64 1, align 8 //CHECK: %[[ORIG_VAL:.*]] = alloca { float, float }, i64 1, align 8 //CHECK: store { float, float } { float 2.000000e+00, float 2.000000e+00 }, ptr %[[ORIG_VAL]], align 4 -//CHECK: br label %entry +//CHECK: br label %[[ENTRY:.*]] -//CHECK: entry: ; preds = %0 +//CHECK: [[ENTRY]]: ; preds = %0 //CHECK: %[[ATOMIC_TEMP_LOAD:.*]] = alloca { float, float }, align 8 //CHECK: call void @__atomic_load(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], i32 0) //CHECK: %[[PHI_NODE_ENTRY_1:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 8 //CHECK: br label %.atomic.cont //CHECK: .atomic.cont -//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %entry ], [ %{{.*}}, %.atomic.cont ] +//CHECK: %[[VAL_4:.*]] = phi { float, float } [ %[[PHI_NODE_ENTRY_1]], %{{.*}} ], [ %{{.*}}, %.atomic.cont ] //CHECK: %[[VAL_5:.*]] = extractvalue { float, float } %[[VAL_4]], 0 //CHECK: %[[VAL_6:.*]] = extractvalue { float, float } %[[VAL_4]], 1 //CHECK: %[[VAL_7:.*]] = fadd contract float %[[VAL_5]], 1.000000e+00 @@ -1613,7 +1613,7 @@ llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr, %expr: i32) { // CHECK-LABEL: @atomic_update_cmpxchg // CHECK-SAME: (ptr %[[X:.*]], ptr %[[EXPR:.*]]) { // CHECK: %[[AT_LOAD_VAL:.*]] = load atomic i32, ptr %[[X]] monotonic, align 4 -// CHECK: %[[LOAD_VAL_PHI:.*]] = phi i32 [ %[[AT_LOAD_VAL]], %entry ], [ %[[LOAD_VAL:.*]], %.atomic.cont ] +// CHECK: %[[LOAD_VAL_PHI:.*]] = phi i32 [ %[[AT_LOAD_VAL]], %{{.*}} ], [ %[[LOAD_VAL:.*]], %.atomic.cont ] // CHECK: %[[VAL_SUCCESS:.*]] = cmpxchg ptr %[[X]], i32 %[[LOAD_VAL_PHI]], i32 %{{.*}} monotonic monotonic, align 4 // CHECK: %[[LOAD_VAL]] = extractvalue { i32, i1 } %[[VAL_SUCCESS]], 0 // CHECK: br i1 %{{.*}}, label %.atomic.exit, label %.atomic.cont @@ -2216,8 +2216,8 @@ llvm.func @omp_sections_empty() -> () { omp.sections { omp.terminator } - // CHECK-NEXT: br label %entry - // CHECK: entry: + // CHECK-NEXT: br label %[[ENTRY:.*]] + // CHECK: [[ENTRY]]: // CHECK-NEXT: ret void llvm.return } @@ -3093,7 +3093,7 @@ llvm.func @omp_task_final(%boolexpr: i1) { // CHECK: br label %[[entry:[^,]+]] // CHECK: [[entry]]: // CHECK: br label %[[codeRepl:[^,]+]] -// CHECK: [[codeRepl]]: ; preds = %entry +// CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) // CHECK: %[[final_flag:.+]] = select i1 %[[boolexpr]], i32 2, i32 0 // CHECK: %[[task_flags:.+]] = or i32 %[[final_flag]], 1 diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir index 55fb5954548a0..134eb8ef5fd9f 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir @@ -30,7 +30,7 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr {fir.bindc_name = "x"}, %arg1: ! // CHECK: %[[VAL_0:.*]] = alloca { ptr, ptr }, align 8 // CHECK: br label %[[VAL_1:.*]] -// CHECK: entry: ; preds = %[[VAL_2:.*]] +// CHECK: [[VAL_1]]: ; preds = %[[VAL_2:.*]] // CHECK: %[[VAL_3:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: br label %[[VAL_4:.*]] // CHECK: omp_parallel: ; preds = %[[VAL_1]] diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index fdfcc66b91012..8adaabfea2f07 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -78,27 +78,31 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK-LABEL: define internal void @sectionsreduction_..omp_par // CHECK: omp.par.entry: -// CHECK: %[[VAL_6:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_7:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_8:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_9:.*]] = alloca i32, align 4 // CHECK: %[[VAL_10:.*]] = alloca i32, align 4 // CHECK: %[[VAL_11:.*]] = load i32, ptr %[[VAL_12:.*]], align 4 // CHECK: store i32 %[[VAL_11]], ptr %[[VAL_10]], align 4 // CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4 +// CHECK: br label %omp.par.region + +// CHECK: omp.par.region: +// CHECK: %[[VAL_6:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_7:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_8:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_9:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 // CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8 -// CHECK: br label %[[VAL_15:.*]] -// CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]] +// CHECK: br label %omp.reduction.init + +// CHECK: omp.reduction.init: // CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 -// CHECK: br label %[[VAL_17:.*]] -// CHECK: omp.par.region: ; preds = %[[VAL_15]] -// CHECK: br label %[[VAL_18:.*]] -// CHECK: omp.par.region1: ; preds = %[[VAL_17]] -// CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: br label %[[CONT_BB:.*]] + +// CHECK: [[CONT_BB]]: // CHECK: br label %[[VAL_22:.*]] -// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]] + +// CHECK: omp_section_loop.preheader: // CHECK: store i32 0, ptr %[[VAL_7]], align 4 // CHECK: store i32 1, ptr %[[VAL_8]], align 4 // CHECK: store i32 1, ptr %[[VAL_9]], align 4 @@ -138,17 +142,17 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: br label %[[VAL_46:.*]] // CHECK: omp.reduction.nonatomic.body: ; preds = %[[VAL_43]] // CHECK: br label %[[VAL_47:.*]] -// CHECK: omp.reduction.nonatomic.body16: ; preds = %[[VAL_48:.*]], %[[VAL_46]] +// CHECK: [[VAL_47]]: ; preds = %[[VAL_48:.*]], %[[VAL_46]] // CHECK: %[[VAL_49:.*]] = phi i64 [ %[[VAL_50:.*]], %[[VAL_48]] ], [ 0, %[[VAL_46]] ] // CHECK: %[[VAL_51:.*]] = icmp sgt i64 %[[VAL_49]], 0 // CHECK: br i1 %[[VAL_51]], label %[[VAL_48]], label %[[VAL_52:.*]] -// CHECK: omp.reduction.nonatomic.body18: ; preds = %[[VAL_47]] +// CHECK: [[VAL_52]]: ; preds = %[[VAL_47]] // CHECK: br label %[[VAL_53:.*]] -// CHECK: omp.region.cont15: ; preds = %[[VAL_52]] +// CHECK: [[VAL_53]]: ; preds = %[[VAL_52]] // CHECK: %[[VAL_54:.*]] = phi ptr [ %[[VAL_19]], %[[VAL_52]] ] // CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_40]], ptr @.gomp_critical_user_.reduction.var) // CHECK: br label %[[VAL_42]] -// CHECK: omp.reduction.nonatomic.body17: ; preds = %[[VAL_47]] +// CHECK: [[VAL_48]]: ; preds = %[[VAL_47]] // CHECK: %[[VAL_50]] = sub i64 %[[VAL_49]], 1 // CHECK: br label %[[VAL_47]] // CHECK: reduce.finalize: ; preds = %[[VAL_53]], %[[VAL_38]] @@ -160,15 +164,15 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: %[[VAL_58:.*]] = ptrtoint ptr %[[VAL_56]] to i64 // CHECK: %[[VAL_59:.*]] = icmp ne i64 %[[VAL_58]], 0 // CHECK: br i1 %[[VAL_59]], label %[[VAL_60:.*]], label %[[VAL_61:.*]] -// CHECK: omp.reduction.cleanup22: ; preds = %[[VAL_60]], %[[VAL_57]] +// CHECK: [[VAL_61]]: ; preds = %[[VAL_60]], %[[VAL_57]] // CHECK: br label %[[VAL_62:.*]] -// CHECK: omp.region.cont20: ; preds = %[[VAL_61]] +// CHECK: [[VAL_62]]: ; preds = %[[VAL_61]] // CHECK: br label %[[VAL_63:.*]] // CHECK: omp.region.cont: ; preds = %[[VAL_62]] // CHECK: br label %[[VAL_64:.*]] // CHECK: omp.par.pre_finalize: ; preds = %[[VAL_63]] // CHECK: br label %[[VAL_65:.*]] -// CHECK: omp.reduction.cleanup21: ; preds = %[[VAL_57]] +// CHECK: [[VAL_60]]: ; preds = %[[VAL_57]] // CHECK: br label %[[VAL_61]] // CHECK: omp_section_loop.body: ; preds = %[[VAL_32]] // CHECK: %[[VAL_66:.*]] = add i32 %[[VAL_30]], %[[VAL_24]] @@ -178,34 +182,34 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: i32 0, label %[[VAL_70:.*]] // CHECK: i32 1, label %[[VAL_71:.*]] // CHECK: ] -// CHECK: omp_section_loop.body.case6: ; preds = %[[VAL_34]] +// CHECK: [[VAL_71]]: ; preds = %[[VAL_34]] // CHECK: br label %[[VAL_72:.*]] -// CHECK: omp.section.region8: ; preds = %[[VAL_71]] +// CHECK: [[VAL_72]]: ; preds = %[[VAL_71]] // CHECK: br label %[[VAL_73:.*]] -// CHECK: omp.section.region9: ; preds = %[[VAL_74:.*]], %[[VAL_72]] +// CHECK: [[VAL_73]]: ; preds = %[[VAL_74:.*]], %[[VAL_72]] // CHECK: %[[VAL_75:.*]] = phi i64 [ %[[VAL_76:.*]], %[[VAL_74]] ], [ 1, %[[VAL_72]] ] // CHECK: %[[VAL_77:.*]] = icmp sgt i64 %[[VAL_75]], 0 // CHECK: br i1 %[[VAL_77]], label %[[VAL_74]], label %[[VAL_78:.*]] -// CHECK: omp.section.region11: ; preds = %[[VAL_73]] +// CHECK: [[VAL_78]]: ; preds = %[[VAL_73]] // CHECK: br label %[[VAL_79:.*]] -// CHECK: omp.region.cont7: ; preds = %[[VAL_78]] +// CHECK: [[VAL_79]]: ; preds = %[[VAL_78]] // CHECK: br label %[[VAL_69]] -// CHECK: omp.section.region10: ; preds = %[[VAL_73]] +// CHECK: [[VAL_74]]: ; preds = %[[VAL_73]] // CHECK: %[[VAL_76]] = sub i64 %[[VAL_75]], 1 // CHECK: br label %[[VAL_73]] // CHECK: omp_section_loop.body.case: ; preds = %[[VAL_34]] // CHECK: br label %[[VAL_80:.*]] // CHECK: omp.section.region: ; preds = %[[VAL_70]] // CHECK: br label %[[VAL_81:.*]] -// CHECK: omp.section.region3: ; preds = %[[VAL_82:.*]], %[[VAL_80]] +// CHECK: [[VAL_81]]: ; preds = %[[VAL_82:.*]], %[[VAL_80]] // CHECK: %[[VAL_83:.*]] = phi i64 [ %[[VAL_84:.*]], %[[VAL_82]] ], [ 1, %[[VAL_80]] ] // CHECK: %[[VAL_85:.*]] = icmp sgt i64 %[[VAL_83]], 0 // CHECK: br i1 %[[VAL_85]], label %[[VAL_82]], label %[[VAL_86:.*]] -// CHECK: omp.section.region5: ; preds = %[[VAL_81]] +// CHECK: [[VAL_86]]: ; preds = %[[VAL_81]] // CHECK: br label %[[VAL_87:.*]] -// CHECK: omp.region.cont2: ; preds = %[[VAL_86]] +// CHECK: [[VAL_87]]: ; preds = %[[VAL_86]] // CHECK: br label %[[VAL_69]] -// CHECK: omp.section.region4: ; preds = %[[VAL_81]] +// CHECK: [[VAL_82]]: ; preds = %[[VAL_81]] // CHECK: %[[VAL_84]] = sub i64 %[[VAL_83]], 1 // CHECK: br label %[[VAL_81]] // CHECK: omp_section_loop.body.sections.after: ; preds = %[[VAL_79]], %[[VAL_87]], %[[VAL_34]] diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir index 8e28f0b85b259..6a3ebfd9455f2 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -36,7 +36,7 @@ module { // CHECK: %[[VAL_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: br label %[[VAL_3:.*]] -// CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: [[VAL_3]]: ; preds = %[[VAL_4:.*]] // CHECK: %[[VAL_5:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) // CHECK: br label %[[VAL_6:.*]] // CHECK: omp_parallel: ; preds = %[[VAL_3]] diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir index ed7e9fada5fc4..91192ff769617 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir @@ -39,25 +39,32 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: omp.par.entry: // CHECK: %[[VAL_9:.*]] = getelementptr { ptr }, ptr %[[VAL_10:.*]], i32 0, i32 0 // CHECK: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_9]], align 8 -// CHECK: %[[VAL_12:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_13:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_14:.*]] = alloca i32, align 4 -// CHECK: %[[VAL_15:.*]] = alloca i32, align 4 // CHECK: %[[VAL_16:.*]] = alloca i32, align 4 // CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4 // CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4 // CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4 +// CHECK: br label %omp.par.region + +// CHECK: omp.par.region: +// CHECK: br label %omp.par.region1 + +// CHECK: omp.par.region1: +// CHECK: %[[VAL_12:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_13:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_14:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_15:.*]] = alloca i32, align 4 // CHECK: %[[VAL_20:.*]] = alloca float, align 4 // CHECK: %[[VAL_21:.*]] = alloca [1 x ptr], align 8 -// CHECK: br label %[[VAL_22:.*]] -// CHECK: omp.reduction.init: ; preds = %[[VAL_23:.*]] +// CHECK: br label %omp.reduction.init + +// CHECK: omp.reduction.init: // CHECK: store float 0.000000e+00, ptr %[[VAL_20]], align 4 -// CHECK: br label %[[VAL_24:.*]] -// CHECK: omp.par.region: ; preds = %[[VAL_22]] -// CHECK: br label %[[VAL_25:.*]] -// CHECK: omp.par.region1: ; preds = %[[VAL_24]] -// CHECK: br label %[[VAL_26:.*]] -// CHECK: omp_section_loop.preheader: ; preds = %[[VAL_25]] +// CHECK: br label %[[CONT_BB:.*]] + +// CHECK: [[CONT_BB]]: +// CHECK: br label %[[PRE_HEADER:omp_section_loop.preheader]] + +// CHECK: [[PRE_HEADER]]: // CHECK: store i32 0, ptr %[[VAL_13]], align 4 // CHECK: store i32 1, ptr %[[VAL_14]], align 4 // CHECK: store i32 1, ptr %[[VAL_15]], align 4 @@ -68,8 +75,9 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: %[[VAL_30:.*]] = sub i32 %[[VAL_29]], %[[VAL_28]] // CHECK: %[[VAL_31:.*]] = add i32 %[[VAL_30]], 1 // CHECK: br label %[[VAL_32:.*]] -// CHECK: omp_section_loop.header: ; preds = %[[VAL_33:.*]], %[[VAL_26]] -// CHECK: %[[VAL_34:.*]] = phi i32 [ 0, %[[VAL_26]] ], [ %[[VAL_35:.*]], %[[VAL_33]] ] + +// CHECK: omp_section_loop.header: ; preds = %[[VAL_33:omp_section_loop.inc]], %[[PRE_HEADER]] +// CHECK: %[[VAL_34:.*]] = phi i32 [ 0, %[[PRE_HEADER]] ], [ %[[VAL_35:.*]], %[[VAL_33]] ] // CHECK: br label %[[VAL_36:.*]] // CHECK: omp_section_loop.cond: ; preds = %[[VAL_32]] // CHECK: %[[VAL_37:.*]] = icmp ult i32 %[[VAL_34]], %[[VAL_31]] @@ -115,14 +123,14 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: i32 0, label %[[VAL_60:.*]] // CHECK: i32 1, label %[[VAL_61:.*]] // CHECK: ] -// CHECK: omp_section_loop.body.case3: ; preds = %[[VAL_38]] +// CHECK: [[VAL_61]]: ; preds = %[[VAL_38]] // CHECK: br label %[[VAL_62:.*]] -// CHECK: omp.section.region5: ; preds = %[[VAL_61]] +// CHECK: [[VAL_62]]: ; preds = %[[VAL_61]] // CHECK: %[[VAL_63:.*]] = load float, ptr %[[VAL_20]], align 4 // CHECK: %[[VAL_64:.*]] = fadd contract float %[[VAL_63]], 2.000000e+00 // CHECK: store float %[[VAL_64]], ptr %[[VAL_20]], align 4 // CHECK: br label %[[VAL_65:.*]] -// CHECK: omp.region.cont4: ; preds = %[[VAL_62]] +// CHECK: [[VAL_65]]: ; preds = %[[VAL_62]] // CHECK: br label %[[VAL_59]] // CHECK: omp_section_loop.body.case: ; preds = %[[VAL_38]] // CHECK: br label %[[VAL_66:.*]] @@ -131,7 +139,7 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attributes {fir.in // CHECK: %[[VAL_68:.*]] = fadd contract float %[[VAL_67]], 1.000000e+00 // CHECK: store float %[[VAL_68]], ptr %[[VAL_20]], align 4 // CHECK: br label %[[VAL_69:.*]] -// CHECK: omp.region.cont2: ; preds = %[[VAL_66]] +// CHECK: [[VAL_69]]: ; preds = %[[VAL_66]] // CHECK: br label %[[VAL_59]] // CHECK: omp_section_loop.body.sections.after: ; preds = %[[VAL_65]], %[[VAL_69]], %[[VAL_38]] // CHECK: br label %[[VAL_33]]