From e9dec448befd3a7c11019e52d64706a116f35242 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Sun, 23 Feb 2025 23:21:58 +0000 Subject: [PATCH] [MLIR][OPENMP] Relax requirement about branches as terminator of private alloc Fixes #126966 --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 11 +++-- mlir/test/Target/LLVMIR/openmp-private.mlir | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b9d88a68410ee..bb143ba080542 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1420,20 +1420,19 @@ allocatePrivateVars(llvm::IRBuilderBase &builder, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::DenseMap *mappedPrivateVars = nullptr) { // Allocate private vars - llvm::BranchInst *allocaTerminator = - llvm::cast(allocaIP.getBlock()->getTerminator()); + llvm::Instruction *allocaTerminator = allocaIP.getBlock()->getTerminator(); splitBB(llvm::OpenMPIRBuilder::InsertPointTy(allocaIP.getBlock(), allocaTerminator->getIterator()), true, allocaTerminator->getStableDebugLoc(), "omp.region.after_alloca"); llvm::IRBuilderBase::InsertPointGuard guard(builder); - // Update the allocaTerminator in case the alloca block was split above. - allocaTerminator = - llvm::cast(allocaIP.getBlock()->getTerminator()); + // Update the allocaTerminator since the alloca block was split above. + allocaTerminator = allocaIP.getBlock()->getTerminator(); builder.SetInsertPoint(allocaTerminator); + // The new terminator is an uncondition branch created by the splitBB above. assert(allocaTerminator->getNumSuccessors() == 1 && - "This is an unconditional branch created by OpenMPIRBuilder"); + "This is an unconditional branch created by splitBB"); llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0); diff --git a/mlir/test/Target/LLVMIR/openmp-private.mlir b/mlir/test/Target/LLVMIR/openmp-private.mlir index 24de4a1fdf5e3..d46bfa529da6c 100644 --- a/mlir/test/Target/LLVMIR/openmp-private.mlir +++ b/mlir/test/Target/LLVMIR/openmp-private.mlir @@ -265,3 +265,43 @@ omp.private {type = firstprivate} @_QFequivalenceEx_firstprivate_ptr_f32 : f32 c // CHECK: store float %[[HOST_VAL]], ptr %[[PRIV_ALLOC]], align 4 // Test that we inlined the body of the parallel region. // CHECK: store float 0x{{.*}}, ptr %[[PRIV_ALLOC]], align 4 + +// ----- + +omp.private {type = private} @_QFEi_private_i32 : i32 +llvm.func @_QPprivate_alloc_with_switch() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(30 : i32) : i32 + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "n"} : (i64) -> !llvm.ptr + %4 = llvm.alloca %2 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr + llvm.store %0, %3 : i32, !llvm.ptr + %5 = llvm.load %3 : !llvm.ptr -> i32 + llvm.switch %5 : i32, ^bb1 [ + 1: ^bb1, + 2: ^bb2 + ] +^bb1: // 2 preds: ^bb0, ^bb0 + omp.simd private(@_QFEi_private_i32 %4 -> %arg0 : !llvm.ptr) { + omp.loop_nest (%arg1) : i32 = (%0) to (%1) inclusive step (%0) { + llvm.store %arg1, %arg0 : i32, !llvm.ptr + omp.yield + } + } + llvm.br ^bb2 +^bb2: // 2 preds: ^bb0, ^bb1 + llvm.return +} + +// CHECK-LABEL: define void @_QPprivate_alloc_with_switch() { +// CHECK: br label %[[AFTER_ALLOCA_BLOCK:.*]] +// CHECK: [[AFTER_ALLOCA_BLOCK]]: +// CHECK: switch i32 %{{.*}}, label %[[PRIVATE_INIT_BLOCK:.*]] [ +// CHECK: i32 1, label %[[PRIVATE_INIT_BLOCK]] +// CHECK: i32 2, label %[[EXIT_BLOCK:.*]] +// CHECK: ] +// CHECK: [[PRIVATE_INIT_BLOCK]]: +// CHECK: omp.private.init: +// CHECK: omp.simd.region: +// CHECK: [[EXIT_BLOCK]]: +// CHECK: ret void