Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ class OpenMPIRBuilder {
/// not have an effect on \p M (see initialize)
OpenMPIRBuilder(Module &M)
: M(M), Builder(M.getContext()), OffloadInfoManager(this),
T(M.getTargetTriple()) {}
T(M.getTargetTriple()), IsFinalized(false) {}
LLVM_ABI ~OpenMPIRBuilder();

class AtomicInfo : public llvm::AtomicInfo {
Expand Down Expand Up @@ -521,6 +521,10 @@ class OpenMPIRBuilder {
/// all functions are finalized.
LLVM_ABI void finalize(Function *Fn = nullptr);

/// Check whether the finalize function has already run
/// \return true if the finalize function has already run
LLVM_ABI bool isFinalized();

/// Add attributes known for \p FnID to \p Fn.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn);

Expand Down Expand Up @@ -3286,6 +3290,8 @@ class OpenMPIRBuilder {
Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
AtomicRMWInst::BinOp RMWOp);

bool IsFinalized;

public:
/// a struct to pack relevant information while generating atomic Ops
struct AtomicOpValue {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
M.getGlobalVariable("__openmp_nvptx_data_transfer_temporary_storage")};
emitUsed("llvm.compiler.used", LLVMCompilerUsed);
}

IsFinalized = true;
}

bool OpenMPIRBuilder::isFinalized() { return IsFinalized; }

OpenMPIRBuilder::~OpenMPIRBuilder() {
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
Expand Down
18 changes: 11 additions & 7 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1563,12 +1563,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
NewValues);

LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
newFunction->dump();
report_fatal_error("verification of newFunction failed!");
});
LLVM_DEBUG(if (verifyFunction(*oldFunction))
report_fatal_error("verification of oldFunction failed!"));
LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n");
LLVM_DEBUG(newFunction->dump());
LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n");
LLVM_DEBUG(oldFunction->dump());
LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
Expand Down Expand Up @@ -1868,8 +1866,14 @@ CallInst *CodeExtractor::emitReplacerCall(
// This takes place of the original loop
BasicBlock *codeReplacer =
BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP);
// In cases with multiple levels of outlining, e.g. with OpenMP,
// AllocationBlock may end up in a function different than oldFunction. We
// need to make sure we do not use it in those cases, otherwise the alloca
// will end up in a different function from its users and break the module.
BasicBlock *AllocaBlock =
AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock();
(AllocationBlock && oldFunction == AllocationBlock->getParent())
? AllocationBlock
: &oldFunction->getEntryBlock();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AllocationBlock is assigned on construction of the CodeExtractor object. Should we instead make sure that we pass a suitable basic block when we construct the code extractor object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we go up the chain, the problem originates over here:


findAllocaInsertPoint does a stackWalk to find a preexisting alloca insert point, and it does find it - but it's in a different function. We could put a check here to make sure that we only use it if it's in the same function as builder.getInsertBlock()?
This would fix the issue, but fixing it inside CodeExtractor would make it it more robust for other cases when a wrong block might be passed on accident. I think I'd lean towards keeping it as-is on that account, but if other people have strong preferences or arguments to the contrary then I am happy to change it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it should be fixed in the stack walk and have an assertion added to the code extractor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good way to do it, thanks for the suggestion - done :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update.

Sorry to be difficult but looking at this change gave me an idea: I think this situation just shouldn't ever happen.

Say we have something like this

omp.op1 {
  other.op1
  omp.op2 {
    other.op2
  }
  other.op3
}

If the lowering for omp.op2 is trying to put allocas in the alloca region for op1 then yes both of those could be outlined to different functions. But I don't think this should ever happen. If omp.op2 is expecting to be outlined, I think it should define its own alloca insertion point on the alloca stack.

I suspect one of the operations in your test needs to add an insertion point to the stack.

Copy link
Contributor Author

@mrkajetanp mrkajetanp Jun 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whenever things are saved on that stack, the thing being saved is what was previously returned by findAllocaInsertPoint. E.g.:

LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(

The specific call which finds the block in a different function during the stack walk is inside convertOmpTarget, here:

findAllocaInsertPoint(builder, moduleTranslation);
.

findAllocaInsertPoint defines its own insert point if it can't already find one, hence adding that extra check fixes the problem at hand. In this case the outer op - task - does add the alloca insertion point to the stack like it should, but that insertion point is inside of main as opposed to the outlined function for task.

That is to say - I don't think there are missing SaveStack operations at the point where it's relevant for this specific case. target does not save the alloca point it finds (maybe it should?) but by the time target would be saving it, the block that was found is already wrong.


// Update the entry count of the function.
if (BFI)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s
; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
Expand Down
6 changes: 5 additions & 1 deletion mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ ModuleTranslation::ModuleTranslation(Operation *module,
}

ModuleTranslation::~ModuleTranslation() {
if (ompBuilder)
if (ompBuilder && !ompBuilder->isFinalized())
ompBuilder->finalize();
}

Expand Down Expand Up @@ -2332,6 +2332,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
// beforehand.
translator.debugTranslation->addModuleFlagsIfNotPresent();

// Call the OpenMP IR Builder callbacks prior to verifying the module
if (auto *ompBuilder = translator.getOpenMPBuilder())
ompBuilder->finalize();

if (!disableVerification &&
llvm::verifyModule(*translator.llvmModule, &llvm::errs()))
return nullptr;
Comment on lines +2335 to 2340
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can move module verification to ~ModuleTranslation instead. This way we both make sure that verification happens after finalization and we do not need to add the isFinalized bool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do this, but then we'd have to change the signature of mlir::translateModuleToLLVMIR to remove the disableVerification option as it would no longer be possible to run the translation without verification, because verification would always happen in the destructor. Is this desirable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think leaving it here makes more sense indeed. Thanks for looking into it.

Expand Down
62 changes: 62 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// This tests the fix for https://github.com/llvm/llvm-project/issues/138102
// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash

// CHECK-LABEL: define internal void @_QQmain..omp_par

omp.private {type = private} @_QFEi_private_i32 : i32
omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> i32
llvm.store %0, %arg1 : i32, !llvm.ptr
omp.yield(%arg1 : !llvm.ptr)
}
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
%2 = llvm.mlir.constant(1 : i64) : i64
%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr
%4 = llvm.mlir.constant(10 : index) : i64
%5 = llvm.mlir.constant(0 : index) : i64
%6 = llvm.mlir.constant(10000 : index) : i64
%7 = llvm.mlir.constant(1 : index) : i64
%8 = llvm.mlir.constant(1 : i64) : i64
%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr
%10 = llvm.mlir.constant(1 : i64) : i64
%11 = llvm.trunc %7 : i64 to i32
llvm.br ^bb1(%11, %4 : i32, i64)
^bb1(%12: i32, %13: i64): // 2 preds: ^bb0, ^bb2
%14 = llvm.icmp "sgt" %13, %5 : i64
llvm.store %12, %3 : i32, !llvm.ptr
omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) {
%19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"}
%20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"}
%21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"}
omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%22 = llvm.mlir.constant(9999 : i32) : i32
%23 = llvm.mlir.constant(1 : i32) : i32
omp.parallel {
%24 = llvm.load %arg2 : !llvm.ptr -> i32
%25 = llvm.add %24, %22 : i32
omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) {
llvm.store %arg5, %arg4 : i32, !llvm.ptr
omp.yield
}
}
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 {
%0 = llvm.mlir.constant(10000 : i32) : i32
llvm.return %0 : i32
}
llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
%0 = llvm.mlir.constant(100000 : i32) : i32
llvm.return %0 : i32
}
Loading