-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for private clause on target constructs
#105471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for private clause on target constructs
#105471
Changes from all commits
9f28204
08ff51e
39a4db4
54ff624
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" | ||
| #include "mlir/Analysis/TopologicalSortUtils.h" | ||
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" | ||
| #include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h" | ||
| #include "mlir/Dialect/OpenMP/OpenMPDialect.h" | ||
| #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" | ||
| #include "mlir/IR/IRMapping.h" | ||
|
|
@@ -3241,12 +3242,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, | |
| DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>()); | ||
| SmallVector<Value> mapVars = targetOp.getMapVars(); | ||
| llvm::Function *llvmOutlinedFn = nullptr; | ||
|
|
||
| // TODO: It can also be false if a compile-time constant `false` IF clause is | ||
| // specified. | ||
| bool isOffloadEntry = | ||
| isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); | ||
|
|
||
| if (!targetOp.getPrivateVars().empty()) { | ||
| OperandRange privateVars = targetOp.getPrivateVars(); | ||
| std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms(); | ||
| auto &targetRegion = opInst.getRegion(0); | ||
| auto &firstTargetBlock = targetRegion.front(); | ||
| auto *regionArgsStart = firstTargetBlock.getArguments().begin(); | ||
| auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size(); | ||
| auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size(); | ||
| BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false); | ||
| omp::PrivateClauseOps newPrivateClauses; | ||
| MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd); | ||
| for (auto [privVar, privatizerNameAttr, blockArg] : | ||
| llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) { | ||
|
|
||
| SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr); | ||
|
|
||
| // 1. Look up the privatizer in the symbol table | ||
| MLIRContext &context = moduleTranslation.getContext(); | ||
| mlir::IRRewriter rewriter(&context); | ||
| omp::PrivateClauseOp privatizer = | ||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(&opInst, | ||
| privSym); | ||
| // Handle only private for now. Also, not handling allocatables yet. | ||
| if (privatizer.getDataSharingType() != | ||
| omp::DataSharingClauseType::Private || | ||
| !privatizer.getDeallocRegion().empty()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Otherwise could you return to indicate failure? |
||
| newPrivateClauses.privateSyms.push_back(privSym); | ||
| newPrivateClauses.privateVars.push_back(privVar); | ||
| continue; | ||
| } | ||
|
|
||
| auto &allocRegion = privatizer.getAllocRegion(); | ||
| // `alloc` region should have only 1 argument | ||
| assert(allocRegion.getNumArguments() == 1 && | ||
| "alloc region of omp::PrivateClauseOp should have one argument"); | ||
| auto allocRegionArg = allocRegion.getArgument(0); | ||
| // Assuming we have the following targetOp and Privatizer | ||
| // | ||
| // omp.private {type = private} @x.privatizer : !llvm.ptr alloc { | ||
| // %1 = alloca... | ||
| // omp.yield(%1) | ||
| // } | ||
| // omp.target map(..) map(..) private(privVar) { | ||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0): | ||
| // ... | ||
| // ... | ||
| // store %v, %priv_arg0 | ||
| // omp.terminator | ||
| // } | ||
| // Roughly, we do the following - | ||
| // Split the first block (bb0) of the target region into two blocks. | ||
| // Then, clone the alloc region of the privatizer between the two | ||
| // new blocks. When cloning replace the alloc argument with privVar. | ||
| // We'll then have | ||
| // | ||
| // omp.target map(..) map(..) private(privVar) { | ||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): | ||
| // ^bb1: (cloned region) // no predecessor | ||
| // %1 = alloca... | ||
| // omp.yield(%1) | ||
| // ^bb2: // no predecessor | ||
| // ... | ||
| // ... | ||
| // store %v, %priv_arg0 | ||
| // omp.terminator | ||
| // } | ||
| // Next, add an unconditional branch from ^bb0 to ^bb1 and change the | ||
| // yield in the last block of the cloned alloc region to an unconditional | ||
| // branch to the other split block before replacing all uses of | ||
| // 'priv_arg0' with the yielded value to finally get the following code | ||
| // structure | ||
| // | ||
| // omp.target map(..) map(..) private(privVar) { | ||
| // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): | ||
| // llvm.br ^bb1 | ||
| // ^bb1: (cloned region) // pred: ^bb0 | ||
| // %1 = alloca... | ||
| // llvm.br ^bb2 | ||
| // ^bb2: // pred: ^bb1 | ||
| // ... | ||
| // ... | ||
| // store %v, %1 // %priv_arg0 replaced with the yield value | ||
| // omp.terminator | ||
| // } | ||
| Block *newBlock = | ||
| rewriter.splitBlock(&firstTargetBlock, firstTargetBlock.begin()); | ||
| rewriter.setInsertionPointToStart(&firstTargetBlock); | ||
| Location loc = targetOp.getLoc(); | ||
| rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock); | ||
|
|
||
| IRMapping cloneMap; | ||
| cloneMap.map(allocRegionArg, privVar); | ||
| auto newBlockIter = std::next(targetRegion.begin(), 1); | ||
| allocRegion.cloneInto(&targetRegion, newBlockIter, cloneMap); | ||
|
|
||
| unsigned allocRegNumBlocks = allocRegion.getBlocks().size(); | ||
| newBlockIter = std::next(targetRegion.begin(), 1); | ||
| auto clonedAllocRegionBackIter = | ||
| std::next(newBlockIter, (allocRegNumBlocks - 1)); | ||
| Block &clonedAllocRegionLastBlock = *clonedAllocRegionBackIter; | ||
|
|
||
| LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(firstTargetBlock.getTerminator()); | ||
| omp::YieldOp yieldOp = | ||
| dyn_cast<omp::YieldOp>(clonedAllocRegionLastBlock.getTerminator()); | ||
| auto newValue = yieldOp.getResults().front(); | ||
| rewriter.setInsertionPointAfter(yieldOp); | ||
| auto *oldSucc = brOp.getSuccessor(); | ||
| brOp.setSuccessor(&*newBlockIter); | ||
| // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock | ||
| // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc? | ||
| rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc); | ||
| blockArgsBV.set(blockArg.getArgNumber()); | ||
| rewriter.replaceAllUsesWith(blockArg, newValue); | ||
| rewriter.eraseOp(yieldOp); | ||
| } | ||
| // Do some fix ups now. | ||
| // First, remove the blockArguments that we just privatized | ||
| firstTargetBlock.eraseArguments(blockArgsBV); | ||
| // Then remove the private vars and privatizers that we have | ||
| // processed i.e privatized just now. | ||
| if (newPrivateClauses.privateSyms.empty()) { | ||
| targetOp.getPrivateVarsMutable().clear(); | ||
| targetOp.removePrivateSymsAttr(); | ||
| } else { | ||
| targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get( | ||
| targetOp.getContext(), newPrivateClauses.privateSyms)); | ||
| targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars); | ||
|
Comment on lines
+3374
to
+3376
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do we do with the un-privatized variables now? I am thinking, we should emit an error in this case. We have
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would convert what is essentially silently failing right now (ie not getting lowered to LLVMIR silently) into hard failures. This change would mean
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a hard failure would good. At least it will allow someone working on the code to find the proper extension point to support allocatables (or whatever is not supported) on this layer. |
||
| } | ||
| } | ||
| LogicalResult bodyGenStatus = success(); | ||
| using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | ||
| auto bodyCB = [&](InsertPointTy allocaIP, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s | ||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc { | ||
| ^bb0(%arg0: !llvm.ptr): | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr | ||
| omp.yield(%1 : !llvm.ptr) | ||
| } | ||
| llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} { | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr | ||
| %2 = llvm.mlir.constant(1 : i64) : i64 | ||
| %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr | ||
| %4 = llvm.mlir.constant(2 : i32) : i32 | ||
| llvm.store %4, %3 : i32, !llvm.ptr | ||
| %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} | ||
| omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) { | ||
| ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): | ||
| %6 = llvm.mlir.constant(10 : i32) : i32 | ||
| %7 = llvm.load %arg0 : !llvm.ptr -> i32 | ||
| %8 = llvm.add %7, %6 : i32 | ||
| llvm.store %8, %arg1 : i32, !llvm.ptr | ||
| omp.terminator | ||
| } | ||
| llvm.return | ||
| } | ||
| // CHECK: define internal void @__omp_offloading_fd00 | ||
| // CHECK-NOT: define {{.*}} | ||
| // CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4 | ||
bhandarkar-pranav marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10 | ||
| // CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4 | ||
|
Comment on lines
+29
to
+31
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Avoid using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is usually save to assume that the block order doesn't change. While the order is not relevant (apart from the entry block), it is deterministic. Note that this is not necessarily a big issue, and if you assume that this order might indeed change at some point in the future, feel free to keep it as is.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just out of curiosity, what would be some examples of OpenMP constructs which might be susceptible to order changes and therefore require the more rigorous |
||
|
|
||
| omp.private {type = private} @n.privatizer : !llvm.ptr alloc { | ||
| ^bb0(%arg0: !llvm.ptr): | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr | ||
| omp.yield(%1 : !llvm.ptr) | ||
| } | ||
| llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} { | ||
| %0 = llvm.mlir.constant(1 : i64) : i64 | ||
| %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr | ||
| %3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr | ||
| %5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr | ||
| %6 = llvm.mlir.constant(2 : i32) : i32 | ||
| llvm.store %6, %5 : i32, !llvm.ptr | ||
| %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} | ||
| omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) { | ||
| ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): | ||
| %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32 | ||
| %9 = llvm.mlir.constant(10 : i32) : i32 | ||
| %10 = llvm.load %arg0 : !llvm.ptr -> i32 | ||
| %11 = llvm.add %10, %9 : i32 | ||
| llvm.store %11, %arg1 : i32, !llvm.ptr | ||
| %12 = llvm.load %arg1 : !llvm.ptr -> i32 | ||
| %13 = llvm.sitofp %12 : i32 to f32 | ||
| %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32 | ||
| llvm.store %14, %arg2 : f32, !llvm.ptr | ||
| omp.terminator | ||
| } | ||
| llvm.return | ||
| } | ||
|
|
||
| // CHECK: define internal void @__omp_offloading_fd00 | ||
| // CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4 | ||
| // CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4 | ||
| // CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10 | ||
| // CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4 | ||
| // CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4 | ||
| // CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float | ||
| // CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01 | ||
| // CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| ! Test target private | ||
| ! REQUIRES: flang, amdgcn-amd-amdhsa | ||
| ! UNSUPPORTED: nvptx64-nvidia-cuda | ||
| ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
| ! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
| ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
| ! UNSUPPORTED: x86_64-pc-linux-gnu | ||
| ! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
|
||
| ! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
| program main | ||
| integer :: a = 0 | ||
| call target_private(a) | ||
| print*, "======= FORTRAN Test passed! =======" | ||
| print*, "foo(a) should not return 20, got " , a | ||
| if (a /= 20) then | ||
| stop 0 | ||
| else | ||
| stop 1 | ||
| end if | ||
|
|
||
| ! stop 0 | ||
| end program main | ||
| subroutine target_private(r) | ||
| implicit none | ||
| integer, dimension(2) :: simple_vars | ||
| integer :: a, r | ||
| ! set a to 5 | ||
| a = 5 | ||
| simple_vars(1) = a | ||
| simple_vars(2) = a | ||
| !$omp target map(tofrom: simple_vars) private(a) | ||
| ! Without private(a), a would be firstprivate, meaning it's value would be 5 | ||
| ! with private(a), it's value would be uninitialized, which means it'd have | ||
| ! a very small chance of being 5. | ||
|
Comment on lines
+34
to
+35
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ultra nit: This case is UB, so anything can happen. Not sure if there would be another case that would not invoke UB in case of missing |
||
| ! So, simple_vars(1|2) should be 5 + a_private | ||
| simple_vars(1) = simple_vars(1) + a | ||
| simple_vars(2) = simple_vars(2) + a | ||
| !$omp end target | ||
| r = simple_vars(1) + simple_vars(2) | ||
| end subroutine target_private | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to remove any
target-specific logic as much as possible (we probably can outline this into a util that other ops use as well, for exampledistributewhich I am working on now).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just as a heads-up, I'm planning to create a PR stack this week to revamp the handling of clause-related block arguments such as these. At that point we won't have to worry so much about the order they appear in, but instead we'll be able to access them through an MLIR interface.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent. That would be great, because the present usage is extremely brittle.