Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7022,10 +7022,11 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
Function *KernelLaunchFunction = StaleCI->getCalledFunction();

// StaleCI is the CallInst which is the call to the outlined
// target kernel launch function. If there are values that the
// outlined function uses then these are aggregated into a structure
// which is passed as the second argument. If not, then there's
// only one argument, the threadID. So, StaleCI can be
// target kernel launch function. If there are local live-in values
// that the outlined function uses then these are aggregated into a structure
// which is passed as the second argument. If there are no local live-in
// vallues or if all values used by the outlined kernel are global variables,
// then there's only one argument, the threadID. So, StaleCI can be
//
// %structArg = alloca { ptr, ptr }, align 8
// %gep_ = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 0
Expand Down Expand Up @@ -7063,6 +7064,8 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
// host and device.
assert((!HasShareds || (StaleCI->arg_size() == 2)) &&
"StaleCI with shareds should have exactly two arguments.");

Value *ThreadId = ProxyFn->getArg(0);
if (HasShareds) {
auto *ArgStructAlloca = dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
assert(ArgStructAlloca &&
Expand All @@ -7073,7 +7076,6 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
AllocaInst *NewArgStructAlloca =
Builder.CreateAlloca(ArgStructType, nullptr, "structArg");
Value *TaskT = ProxyFn->getArg(1);
Value *ThreadId = ProxyFn->getArg(0);
Value *SharedsSize =
Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));

Expand All @@ -7086,7 +7088,9 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
LoadShared->getPointerAlignment(M.getDataLayout()), SharedsSize);

Builder.CreateCall(KernelLaunchFunction, {ThreadId, NewArgStructAlloca});
}
} else
Builder.CreateCall(KernelLaunchFunction, {ThreadId});

Builder.CreateRetVoid();
return ProxyFn;
}
Expand Down Expand Up @@ -7229,11 +7233,28 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP, "global.tid", false));

Builder.restoreIP(TargetTaskBodyIP);

if (Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
return Err;

OI.ExitBB = Builder.saveIP().getBlock();
// The outliner (CodeExtractor) extract a sequence or vector of blocks that
// it is given. These blocks are enumerated by
// OpenMPIRBuilder::OutlineInfo::collectBlocks which expects the OI.ExitBlock
// to be outside the region. In other words, OI.ExitBlock is expected to be
// the start of the region after the outlining. We used to set OI.ExitBlock
// to the InsertBlock after TaskBodyCB is done. This is fine in most cases
// except when the task body is a single basic block. In that case,
// OI.ExitBlock is set to the single task body block and will get left out of
// the outlining process. So, simply create a new empty block to which we
// uncoditionally branch from where TaskBodyCB left off
BasicBlock *TargetTaskContBlock =
BasicBlock::Create(Builder.getContext(), "target.task.cont");

auto *CurFn = Builder.GetInsertBlock()->getParent();
emitBranch(TargetTaskContBlock);
emitBlock(TargetTaskContBlock, CurFn, /*IsFinished=*/true);

OI.ExitBB = TargetTaskContBlock;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can use splitBB here.

Suggested change
BasicBlock *TargetTaskContBlock =
BasicBlock::Create(Builder.getContext(), "target.task.cont");
auto *CurFn = Builder.GetInsertBlock()->getParent();
emitBranch(TargetTaskContBlock);
emitBlock(TargetTaskContBlock, CurFn, /*IsFinished=*/true);
OI.ExitBB = TargetTaskContBlock;
OI.ExitBB = splitBB(Builder, /*CreateBranch=*/true, "target.task.cont");
emitBlock(OI.ExitBB, OI.ExitBB->getParent(), /*IsFinished=*/true);

Copy link
Contributor Author

@bhandarkar-pranav bhandarkar-pranav Feb 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emitBlock isn't needed as splitBB calls BasicBlock::create with a Function *. This results in the block being inserted into the function. Calling emitBlock after splitBlock triggers asserts.


OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait,
DeviceID](Function &OutlinedFn) mutable {
assert(OutlinedFn.getNumUses() == 1 &&
Expand Down
21 changes: 21 additions & 0 deletions mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,29 @@ module attributes {omp.is_target_device = false} {
// CHECK: define void @omp_target_depend_()
// CHECK-NOT: define {{.*}} @
// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
// CHECK: call void @__kmpc_omp_task_begin_if0
// CHECK-NEXT: call void @.omp_target_task_proxy_func
// CHECK: call void @__kmpc_omp_task_complete_if0
// https://github.com/llvm/llvm-project/issues/126949 exposes two issues
// 1. Empty target task proxy functions
// 2. When 1 fixed, it leads to a second problem of calling the omp target kernel twice
// Once via the target task proxy function and a second time after the target task is done.
// The following checks check problem #2.
// functions. The following checks tests the fix for this issue.
// CHECK-NEXT: br label %[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY:.*]]
// CHECK:[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY]]:
// CHECK-NEXT: ret void

// CHECK: define internal void @omp_target_depend_..omp_par
// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_depend__l[[LINE:.*]](ptr {{.*}})
// CHECK-NEXT: br label %[[BLOCK_AFTER_TARGET_TASK_BODY:.*]]
// CHECK: [[BLOCK_AFTER_TARGET_TASK_BODY]]:
// CHECK-NEXT: ret void


// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_depend__l[[LINE]](ptr %[[ADDR_A:.*]])
// CHECK: store i32 100, ptr %[[ADDR_A]], align 4

// The following check test for the fix of problem #1 as described in https://github.com/llvm/llvm-project/issues/126949
// CHECK: define internal void @.omp_target_task_proxy_func
// CHECK: call void @omp_target_depend_..omp_par
20 changes: 20 additions & 0 deletions mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,30 @@ module attributes {omp.is_target_device = false} {
// CHECK: define void @omp_target_nowait_()
// CHECK-NOT: define {{.*}} @
// CHECK-NOT: call ptr @__kmpc_omp_target_task_alloc({{.*}})
// CHECK: call void @__kmpc_omp_task_begin_if0
// CHECK-NEXT: call void @.omp_target_task_proxy_func
// CHECK: call void @__kmpc_omp_task_complete_if0
// https://github.com/llvm/llvm-project/issues/126949 exposes two issues
// 1. Empty target task proxy functions
// 2. When 1 fixed, it leads to a second problem of calling the omp target kernel twice
// Once via the target task proxy function and a second time after the target task is done.
// The following checks check problem #2.
// functions. The following checks tests the fix for this issue.
// CHECK-NEXT: br label %[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY:.*]]
// CHECK:[[BLOCK_AFTER_OUTLINED_TARGET_TASK_BODY]]:
// CHECK-NEXT: ret void

// Verify that we directly emit a call to the "target" region's body from the
// parent function of the the `omp.target` op.
// CHECK: define internal void @omp_target_nowait_..omp_par
// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_nowait__l[[LINE:.*]](ptr {{.*}})
// CHECK-NEXT: br label %[[BLOCK_AFTER_TARGET_TASK_BODY:.*]]
// CHECK: [[BLOCK_AFTER_TARGET_TASK_BODY]]:
// CHECK-NEXT: ret void

// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_nowait__l[[LINE]](ptr %[[ADDR_X:.*]])
// CHECK: store float 5{{.*}}, ptr %[[ADDR_X]], align 4

// The following check test for the fix of problem #1 as described in https://github.com/llvm/llvm-project/issues/126949
// CHECK: define internal void @.omp_target_task_proxy_func
// CHECK: call void @omp_target_nowait_..omp_par