diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 11780f84697b1..31d97313aedb7 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2494,7 +2494,12 @@ LogicalResult PrivateClauseOp::verify() { << "Did not expect any values to be yielded."; } - if (yieldedTypes.size() == 1 && yieldedTypes.front() == symType) + if (yieldedTypes.size() != 1) + return mlir::emitError(terminator->getLoc()) + << "Expected exactly 1 yielded value, but got " + << yieldedTypes.size(); + + if (yieldedTypes.front() == symType) return success(); auto error = mlir::emitError(yieldOp.getLoc()) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 458d05d5059db..29ab6f987906f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -13,6 +13,7 @@ #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Analysis/TopologicalSortUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" #include "mlir/IR/IRMapping.h" @@ -3241,12 +3242,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, DataLayout dl = DataLayout(opInst.getParentOfType()); SmallVector mapVars = targetOp.getMapVars(); llvm::Function *llvmOutlinedFn = nullptr; - // TODO: It can also be false if a compile-time constant `false` IF clause is // specified. bool isOffloadEntry = isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); + if (!targetOp.getPrivateVars().empty()) { + OperandRange privateVars = targetOp.getPrivateVars(); + std::optional privateSyms = targetOp.getPrivateSyms(); + auto &targetRegion = opInst.getRegion(0); + auto &firstTargetBlock = targetRegion.front(); + auto *regionArgsStart = firstTargetBlock.getArguments().begin(); + auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size(); + auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size(); + BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false); + omp::PrivateClauseOps newPrivateClauses; + MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd); + for (auto [privVar, privatizerNameAttr, blockArg] : + llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) { + + SymbolRefAttr privSym = llvm::cast(privatizerNameAttr); + + // 1. Look up the privatizer in the symbol table + MLIRContext &context = moduleTranslation.getContext(); + mlir::IRRewriter rewriter(&context); + omp::PrivateClauseOp privatizer = + SymbolTable::lookupNearestSymbolFrom(&opInst, + privSym); + // Handle only private for now. Also, not handling allocatables yet. + if (privatizer.getDataSharingType() != + omp::DataSharingClauseType::Private || + !privatizer.getDeallocRegion().empty()) { + newPrivateClauses.privateSyms.push_back(privSym); + newPrivateClauses.privateVars.push_back(privVar); + continue; + } + + auto &allocRegion = privatizer.getAllocRegion(); + // `alloc` region should have only 1 argument + assert(allocRegion.getNumArguments() == 1 && + "alloc region of omp::PrivateClauseOp should have one argument"); + auto allocRegionArg = allocRegion.getArgument(0); + // Assuming we have the following targetOp and Privatizer + // + // omp.private {type = private} @x.privatizer : !llvm.ptr alloc { + // %1 = alloca... + // omp.yield(%1) + // } + // omp.target map(..) map(..) private(privVar) { + // ^bb0(map_arg0, ..., map_argn, priv_arg0): + // ... + // ... + // store %v, %priv_arg0 + // omp.terminator + // } + // Roughly, we do the following - + // Split the first block (bb0) of the target region into two blocks. + // Then, clone the alloc region of the privatizer between the two + // new blocks. When cloning replace the alloc argument with privVar. + // We'll then have + // + // omp.target map(..) map(..) private(privVar) { + // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): + // ^bb1: (cloned region) // no predecessor + // %1 = alloca... + // omp.yield(%1) + // ^bb2: // no predecessor + // ... + // ... + // store %v, %priv_arg0 + // omp.terminator + // } + // Next, add an unconditional branch from ^bb0 to ^bb1 and change the + // yield in the last block of the cloned alloc region to an unconditional + // branch to the other split block before replacing all uses of + // 'priv_arg0' with the yielded value to finally get the following code + // structure + // + // omp.target map(..) map(..) private(privVar) { + // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn): + // llvm.br ^bb1 + // ^bb1: (cloned region) // pred: ^bb0 + // %1 = alloca... + // llvm.br ^bb2 + // ^bb2: // pred: ^bb1 + // ... + // ... + // store %v, %1 // %priv_arg0 replaced with the yield value + // omp.terminator + // } + Block *newBlock = + rewriter.splitBlock(&firstTargetBlock, firstTargetBlock.begin()); + rewriter.setInsertionPointToStart(&firstTargetBlock); + Location loc = targetOp.getLoc(); + rewriter.create(loc, ValueRange(), newBlock); + + IRMapping cloneMap; + cloneMap.map(allocRegionArg, privVar); + auto newBlockIter = std::next(targetRegion.begin(), 1); + allocRegion.cloneInto(&targetRegion, newBlockIter, cloneMap); + + unsigned allocRegNumBlocks = allocRegion.getBlocks().size(); + newBlockIter = std::next(targetRegion.begin(), 1); + auto clonedAllocRegionBackIter = + std::next(newBlockIter, (allocRegNumBlocks - 1)); + Block &clonedAllocRegionLastBlock = *clonedAllocRegionBackIter; + + LLVM::BrOp brOp = dyn_cast(firstTargetBlock.getTerminator()); + omp::YieldOp yieldOp = + dyn_cast(clonedAllocRegionLastBlock.getTerminator()); + auto newValue = yieldOp.getResults().front(); + rewriter.setInsertionPointAfter(yieldOp); + auto *oldSucc = brOp.getSuccessor(); + brOp.setSuccessor(&*newBlockIter); + // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock + // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc? + rewriter.create(loc, ValueRange(), oldSucc); + blockArgsBV.set(blockArg.getArgNumber()); + rewriter.replaceAllUsesWith(blockArg, newValue); + rewriter.eraseOp(yieldOp); + } + // Do some fix ups now. + // First, remove the blockArguments that we just privatized + firstTargetBlock.eraseArguments(blockArgsBV); + // Then remove the private vars and privatizers that we have + // processed i.e privatized just now. + if (newPrivateClauses.privateSyms.empty()) { + targetOp.getPrivateVarsMutable().clear(); + targetOp.removePrivateSymsAttr(); + } else { + targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get( + targetOp.getContext(), newPrivateClauses.privateSyms)); + targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars); + } + } LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto bodyCB = [&](InsertPointTy allocaIP, diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 1d1d93f097758..e3267c520490a 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -2219,7 +2219,7 @@ omp.private {type = private} @x.privatizer : i32 alloc { omp.private {type = private} @x.privatizer : i32 alloc { ^bb0(%arg0: i32): - // expected-error @below {{Invalid yielded value. Expected type: 'i32', got: None}} + // expected-error @below {{Expected exactly 1 yielded value, but got 0}} omp.yield } diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir new file mode 100644 index 0000000000000..97da4c832ca8a --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir @@ -0,0 +1,72 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} +llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(2 : i32) : i32 + llvm.store %4, %3 : i32, !llvm.ptr + %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} + omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %6 = llvm.mlir.constant(10 : i32) : i32 + %7 = llvm.load %arg0 : !llvm.ptr -> i32 + %8 = llvm.add %7, %6 : i32 + llvm.store %8, %arg1 : i32, !llvm.ptr + omp.terminator + } + llvm.return +} +// CHECK: define internal void @__omp_offloading_fd00 +// CHECK-NOT: define {{.*}} +// CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4 +// CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10 +// CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4 + +omp.private {type = private} @n.privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} +llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr + %5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(2 : i32) : i32 + llvm.store %6, %5 : i32, !llvm.ptr + %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} + omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32 + %9 = llvm.mlir.constant(10 : i32) : i32 + %10 = llvm.load %arg0 : !llvm.ptr -> i32 + %11 = llvm.add %10, %9 : i32 + llvm.store %11, %arg1 : i32, !llvm.ptr + %12 = llvm.load %arg1 : !llvm.ptr -> i32 + %13 = llvm.sitofp %12 : i32 to f32 + %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath} : f32 + llvm.store %14, %arg2 : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @__omp_offloading_fd00 +// CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4 +// CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4 +// CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10 +// CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4 +// CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4 +// CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float +// CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01 +// CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4 + diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90 new file mode 100644 index 0000000000000..dfe29f33aae9d --- /dev/null +++ b/offload/test/offloading/fortran/target-private-map.f90 @@ -0,0 +1,41 @@ +! Test target private +! REQUIRES: flang, amdgcn-amd-amdhsa +! UNSUPPORTED: nvptx64-nvidia-cuda +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + integer :: a = 0 + call target_private(a) + print*, "======= FORTRAN Test passed! =======" + print*, "foo(a) should not return 20, got " , a + if (a /= 20) then + stop 0 + else + stop 1 + end if + + ! stop 0 +end program main +subroutine target_private(r) + implicit none + integer, dimension(2) :: simple_vars + integer :: a, r + ! set a to 5 + a = 5 + simple_vars(1) = a + simple_vars(2) = a + !$omp target map(tofrom: simple_vars) private(a) + ! Without private(a), a would be firstprivate, meaning it's value would be 5 + ! with private(a), it's value would be uninitialized, which means it'd have + ! a very small chance of being 5. + ! So, simple_vars(1|2) should be 5 + a_private + simple_vars(1) = simple_vars(1) + a + simple_vars(2) = simple_vars(2) + a + !$omp end target + r = simple_vars(1) + simple_vars(2) +end subroutine target_private