-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for private clause on target constructs
#105471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for private clause on target constructs
#105471
Changes from 3 commits
9f28204
08ff51e
39a4db4
54ff624
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |||||
| #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" | ||||||
| #include "mlir/Analysis/TopologicalSortUtils.h" | ||||||
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" | ||||||
| #include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h" | ||||||
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" | ||||||
| #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" | ||||||
| #include "mlir/IR/IRMapping.h" | ||||||
|
|
@@ -3241,12 +3242,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, | |||||
| DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>()); | ||||||
| SmallVector<Value> mapVars = targetOp.getMapVars(); | ||||||
| llvm::Function *llvmOutlinedFn = nullptr; | ||||||
|
|
||||||
| // TODO: It can also be false if a compile-time constant `false` IF clause is | ||||||
| // specified. | ||||||
| bool isOffloadEntry = | ||||||
| isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); | ||||||
|
|
||||||
| if (!targetOp.getPrivateVars().empty()) { | ||||||
| OperandRange privateVars = targetOp.getPrivateVars(); | ||||||
| std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms(); | ||||||
| auto &firstTargetRegion = opInst.getRegion(0); | ||||||
| auto &firstTargetBlock = firstTargetRegion.front(); | ||||||
| auto *regionArgsStart = firstTargetBlock.getArguments().begin(); | ||||||
| auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size(); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to remove any
Suggested change
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just as a heads-up, I'm planning to create a PR stack this week to revamp the handling of clause-related block arguments such as these. At that point we won't have to worry so much about the order they appear in, but instead we'll be able to access them through an MLIR interface.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excellent. That would be great, because the present usage is extremely brittle. |
||||||
| auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size(); | ||||||
| BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false); | ||||||
| omp::PrivateClauseOps newPrivateClauses; | ||||||
| MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd); | ||||||
| for (auto [privVar, privatizerNameAttr, blockArg] : | ||||||
| llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) { | ||||||
|
|
||||||
| SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr); | ||||||
|
|
||||||
| // 1. Clone the privatizer so that we can make changes to it freely | ||||||
| MLIRContext &context = moduleTranslation.getContext(); | ||||||
| mlir::IRRewriter rewriter(&context); | ||||||
| omp::PrivateClauseOp privatizer = | ||||||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(&opInst, | ||||||
| privSym); | ||||||
| // Handle only private for now. Also, not handling allocatables yet. | ||||||
| if (privatizer.getDataSharingType() != | ||||||
| omp::DataSharingClauseType::Private || | ||||||
| !privatizer.getDeallocRegion().empty()) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise could you return to indicate failure? |
||||||
| newPrivateClauses.privateSyms.push_back(privSym); | ||||||
| newPrivateClauses.privateVars.push_back(privVar); | ||||||
| continue; | ||||||
| } | ||||||
|
|
||||||
| auto &allocRegion = privatizer.getAllocRegion(); | ||||||
| // `alloc` region should have only 1 argument | ||||||
| assert(allocRegion.getNumArguments() == 1 && | ||||||
| "alloc region of omp::PrivateClauseOp should have one argument"); | ||||||
| auto allocRegionArg = allocRegion.getArgument(0); | ||||||
| // Assuming we have the following targetOp and Privatizer | ||||||
| // | ||||||
| // omp.private {type = private} @x.privatizer : !llvm.ptr alloc { | ||||||
| // %1 = alloca... | ||||||
| // omp.yield(%1) | ||||||
| // } | ||||||
| // omp.target map(..) map(..) private(privVar) { | ||||||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0): | ||||||
| // ... | ||||||
| // ... | ||||||
| // store %v, %priv_arg0 | ||||||
| // omp.terminator | ||||||
| // } | ||||||
| // Roughly, we do the following - | ||||||
| // Split the first block (bb0) of the first region of the target into | ||||||
| // two block. Then clone the alloc region of the privatizer between the | ||||||
| // two new blocks. When cloning replace the alloc argument with privVar. | ||||||
| // We'll then have | ||||||
| // | ||||||
| // omp.target map(..) map(..) private(privVar) { | ||||||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): | ||||||
| // ^bb1: (cloned region) // no predecessor | ||||||
| // %1 = alloca... | ||||||
| // omp.yield(%1) | ||||||
| // ^bb2: // no predecessor | ||||||
| // ... | ||||||
| // ... | ||||||
| // store %v, %priv_arg0 | ||||||
| // omp.terminator | ||||||
| // } | ||||||
| // Next, add an unconditional branch from ^bb0 to ^bb1 and change the | ||||||
| // yield in the last block of the cloned alloc region to an unconditional | ||||||
| // branch before replacing all uses of 'priv_arg0' with the yielded value | ||||||
| // to finally get the following | ||||||
| // | ||||||
| // omp.target map(..) map(..) private(privVar) { | ||||||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): | ||||||
| // llvm.br ^bb1 | ||||||
| // ^bb1: (cloned region) // pred: ^bb0 | ||||||
| // %1 = alloca... | ||||||
| // llvm.br ^bb2 | ||||||
| // ^bb2: // pred: ^bb1 | ||||||
| // ... | ||||||
| // ... | ||||||
| // store %v, %1 // %priv_arg0 replaced with the yield value | ||||||
| // omp.terminator | ||||||
| // } | ||||||
| Block *newBlock = | ||||||
| rewriter.splitBlock(&firstTargetBlock, firstTargetBlock.begin()); | ||||||
| rewriter.setInsertionPointToStart(&firstTargetBlock); | ||||||
| Location loc = targetOp.getLoc(); | ||||||
| rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock); | ||||||
|
|
||||||
| IRMapping cloneMap; | ||||||
| cloneMap.map(allocRegionArg, privVar); | ||||||
| auto secondBlockIter = std::next(firstTargetRegion.begin(), 1); | ||||||
bhandarkar-pranav marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap); | ||||||
|
|
||||||
| unsigned allocRegNumBlocks = allocRegion.getBlocks().size(); | ||||||
| secondBlockIter = std::next(firstTargetRegion.begin(), 1); | ||||||
| auto clonedAllocRegionEndIter = | ||||||
bhandarkar-pranav marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| std::next(secondBlockIter, (allocRegNumBlocks - 1)); | ||||||
| Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter; | ||||||
|
|
||||||
| Operation *br = firstTargetBlock.getTerminator(); | ||||||
| LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br); | ||||||
| Operation *yield = clonedAllocRegEndBlock.getTerminator(); | ||||||
| omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield); | ||||||
bhandarkar-pranav marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| auto newValue = yieldOp.getResults().front(); | ||||||
| rewriter.setInsertionPointAfter(yield); | ||||||
| auto *oldSucc = brOp.getSuccessor(); | ||||||
| brOp.setSuccessor(&*secondBlockIter); | ||||||
| // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock | ||||||
| // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc? | ||||||
| rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc); | ||||||
| blockArgsBV.set(blockArg.getArgNumber()); | ||||||
| rewriter.replaceAllUsesWith(blockArg, newValue); | ||||||
| rewriter.eraseOp(yield); | ||||||
| } | ||||||
| // Do some fix ups now. | ||||||
| // First, remove the blockArguments that we just privatized | ||||||
| firstTargetBlock.eraseArguments(blockArgsBV); | ||||||
| // Then remove the private vars and privatizers that we have | ||||||
| // processed i.e privatized just now. | ||||||
| if (newPrivateClauses.privateSyms.empty()) { | ||||||
| targetOp.getPrivateVarsMutable().clear(); | ||||||
| targetOp.removePrivateSymsAttr(); | ||||||
| } else { | ||||||
| targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get( | ||||||
| targetOp.getContext(), newPrivateClauses.privateSyms)); | ||||||
| targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars); | ||||||
|
Comment on lines
+3374
to
+3376
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do we do with the un-privatized variables now? I am thinking, we should emit an error in this case. We have
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would convert what is essentially silently failing right now (ie not getting lowered to LLVMIR silently) into hard failures. This change would mean
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a hard failure would good. At least it will allow someone working on the code to find the proper extension point to support allocatables (or whatever is not supported) on this layer. |
||||||
| } | ||||||
| } | ||||||
| LogicalResult bodyGenStatus = success(); | ||||||
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | ||||||
| auto bodyCB = [&](InsertPointTy allocaIP, | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s | ||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc { | ||
| ^bb0(%arg0: !llvm.ptr): | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr | ||
| omp.yield(%1 : !llvm.ptr) | ||
| } | ||
| llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} { | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr | ||
| %2 = llvm.mlir.constant(1 : i64) : i64 | ||
| %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr | ||
| %4 = llvm.mlir.constant(2 : i32) : i32 | ||
| llvm.store %4, %3 : i32, !llvm.ptr | ||
| %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} | ||
| omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) { | ||
| ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): | ||
| %6 = llvm.mlir.constant(10 : i32) : i32 | ||
| %7 = llvm.load %arg0 : !llvm.ptr -> i32 | ||
| %8 = llvm.add %7, %6 : i32 | ||
| llvm.store %8, %arg1 : i32, !llvm.ptr | ||
| omp.terminator | ||
| } | ||
| llvm.return | ||
| } | ||
| // CHECK: define internal void @__omp_offloading_fd00 | ||
| // CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4 | ||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10 | ||
| // CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4 | ||
|
Comment on lines
+29
to
+31
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Avoid using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is usually save to assume that the block order doesn't change. While the order is not relevant (apart from the entry block), it is deterministic. Note that this is not necessarily a big issue, and if you assume that this order might indeed change at some point in the future, feel free to keep it as is.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just out of curiosity, what would be some examples of OpenMP constructs which might be susceptible to order changes and therefore require the more rigorous |
||
|
|
||
| omp.private {type = private} @n.privatizer : !llvm.ptr alloc { | ||
| ^bb0(%arg0: !llvm.ptr): | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr | ||
| omp.yield(%1 : !llvm.ptr) | ||
| } | ||
| llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} { | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr | ||
| %2 = llvm.mlir.constant(1 : i64) : i64 | ||
| %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr | ||
| %4 = llvm.mlir.constant(1 : i64) : i64 | ||
| %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr | ||
bhandarkar-pranav marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| %6 = llvm.mlir.constant(2 : i32) : i32 | ||
| llvm.store %6, %5 : i32, !llvm.ptr | ||
| %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} | ||
| omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) { | ||
| ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): | ||
| %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32 | ||
| %9 = llvm.mlir.constant(10 : i32) : i32 | ||
| %10 = llvm.load %arg0 : !llvm.ptr -> i32 | ||
| %11 = llvm.add %10, %9 : i32 | ||
| llvm.store %11, %arg1 : i32, !llvm.ptr | ||
| %12 = llvm.load %arg1 : !llvm.ptr -> i32 | ||
| %13 = llvm.sitofp %12 : i32 to f32 | ||
| %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32 | ||
| llvm.store %14, %arg2 : f32, !llvm.ptr | ||
| omp.terminator | ||
| } | ||
| llvm.return | ||
| } | ||
|
|
||
| // CHECK: define internal void @__omp_offloading_fd00 | ||
| // CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4 | ||
| // CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4 | ||
| // CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10 | ||
| // CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4 | ||
| // CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4 | ||
| // CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float | ||
| // CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01 | ||
| // CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| ! Test target private | ||
| ! REQUIRES: flang, amdgcn-amd-amdhsa | ||
| ! UNSUPPORTED: nvptx64-nvidia-cuda | ||
| ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
| ! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
| ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
| ! UNSUPPORTED: x86_64-pc-linux-gnu | ||
| ! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
|
||
| ! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
| program main | ||
| integer :: a = 0 | ||
| call target_private(a) | ||
| print*, "======= FORTRAN Test passed! =======" | ||
| print*, "foo(a) should not return 20, got " , a | ||
| if (a /= 20) then | ||
| stop 0 | ||
| else | ||
| stop 1 | ||
| end if | ||
|
|
||
| ! stop 0 | ||
| end program main | ||
| subroutine target_private(r) | ||
| implicit none | ||
| integer, dimension(2) :: simple_vars | ||
| integer :: a, r | ||
| ! set a to 10 | ||
| a = 5 | ||
bhandarkar-pranav marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| simple_vars(1) = a | ||
| simple_vars(2) = a | ||
| !$omp target map(tofrom: simple_vars) private(a) | ||
| ! Without private(a), a would be firstprivate, meaning it's value would be 5 | ||
| ! with private(a), it's value would be uninitialized, which means it'd have | ||
| ! a very small chance of being 5. | ||
|
Comment on lines
+34
to
+35
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ultra nit: This case is UB, so anything can happen. Not sure if there would be another case that would not invoke UB in case of missing |
||
| ! So, simple_vars(1|2) should be 5 + a_private | ||
| simple_vars(1) = simple_vars(1) + a | ||
| simple_vars(2) = simple_vars(2) + a | ||
| !$omp end target | ||
| r = simple_vars(1) + simple_vars(2) | ||
| end subroutine target_private | ||
Uh oh!
There was an error while loading. Please reload this page.