diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 722107c7ec6d7..831d260bbc4bb 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -5323,6 +5323,20 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, if (auto blockArgsIface = dyn_cast(oper)) forwardArgs(moduleTranslation, blockArgsIface); + else { + // Here we map entry block arguments of + // non-BlockArgOpenMPOpInterface ops if they can be encountered + // inside of a function and they define any of these arguments. + if (isa(oper)) + for (auto [operand, arg] : + llvm::zip_equal(oper->getOperands(), + oper->getRegion(0).getArguments())) { + moduleTranslation.mapValue( + arg, builder.CreateLoad( + moduleTranslation.convertType(arg.getType()), + moduleTranslation.lookupValue(operand))); + } + } if (auto loopNest = dyn_cast(oper)) { assert(builder.GetInsertBlock() && @@ -5340,9 +5354,10 @@ convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, // translation of the OpenMP construct being converted (e.g. no // OpenMP runtime calls will be generated). We just need this to // prepare the kernel invocation args. + SmallVector phis; auto result = convertOmpOpRegions( region, oper->getName().getStringRef().str() + ".fake.region", - builder, moduleTranslation); + builder, moduleTranslation, &phis); if (failed(handleError(result, *oper))) return WalkResult::interrupt(); diff --git a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir index 71a4c29eaf0aa..c618b68d52aaf 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-nesting-in-host-ops.mlir @@ -97,6 +97,20 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, llvm.return } + llvm.func @test_target_and_atomic_update(%x: !llvm.ptr, %expr : i32) { + omp.target { + omp.terminator + } + + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + + llvm.return + } + // CHECK-LABEL: define void @test_nested_target_in_parallel_with_private({{.*}}) { // CHECK: br label %omp.parallel.fake.region // CHECK: omp.parallel.fake.region: @@ -132,4 +146,11 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, // CHECK: call void @__kmpc_target_deinit() // CHECK: ret void // CHECK: } + +// CHECK-LABEL: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_test_target_and_atomic_update_{{.*}} { +// CHECK: call i32 @__kmpc_target_init +// CHECK: user_code.entry: +// CHECK: call void @__kmpc_target_deinit() +// CHECK: ret void +// CHECK: } }