diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 008f7099f58b4..9d707250d11d9 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -132,6 +132,8 @@ addLLVMOpBundleAttrs(mlir::ConversionPatternRewriter &rewriter, namespace { +// Replaces an existing operation with an AddressOfOp or an AddrSpaceCastOp +// depending on the existing address spaces of the type. mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, std::uint64_t globalAS, diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index c52be5601bbfe..96e3caa481f51 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -349,7 +349,10 @@ unsigned ConvertFIRToLLVMPattern::getAllocaAddressSpace( mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); assert(parentOp != nullptr && "expected insertion block to have parent operation"); - if (auto module = parentOp->getParentOfType()) + auto module = mlir::isa(parentOp) + ? mlir::cast(parentOp) + : parentOp->getParentOfType(); + if (module) if (mlir::Attribute addrSpace = mlir::DataLayout(module).getAllocaMemorySpace()) return llvm::cast(addrSpace).getUInt(); @@ -361,7 +364,10 @@ unsigned ConvertFIRToLLVMPattern::getProgramAddressSpace( mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); assert(parentOp != nullptr && "expected insertion block to have parent operation"); - if (auto module = parentOp->getParentOfType()) + auto module = mlir::isa(parentOp) + ? mlir::cast(parentOp) + : parentOp->getParentOfType(); + if (module) if (mlir::Attribute addrSpace = mlir::DataLayout(module).getProgramMemorySpace()) return llvm::cast(addrSpace).getUInt(); @@ -373,8 +379,14 @@ unsigned ConvertFIRToLLVMPattern::getGlobalAddressSpace( mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); assert(parentOp != nullptr && "expected insertion block to have parent operation"); - auto dataLayout = mlir::DataLayout::closest(parentOp); - return fir::factory::getGlobalAddressSpace(&dataLayout); + auto module = mlir::isa(parentOp) + ? mlir::cast(parentOp) + : parentOp->getParentOfType(); + if (module) + if (mlir::Attribute addrSpace = + mlir::DataLayout(module).getGlobalMemorySpace()) + return llvm::cast(addrSpace).getUInt(); + return defaultAddressSpace; } } // namespace fir diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index cd87bf8d28ed5..864368740be02 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -3,8 +3,8 @@ // RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC // RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gnu" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC // RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-pc-win32" %s | FileCheck %s --check-prefixes=CHECK,CHECK-COMDAT,GENERIC -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-P0" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-apple-darwin" %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-COMDAT,GENERIC +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s //=================================================== // SUMMARY: Tests for FIR --> LLVM MLIR conversion @@ -17,7 +17,10 @@ fir.global @g_i0 : i32 { fir.has_value %1 : i32 } -// CHECK: llvm.mlir.global external @g_i0() {addr_space = 0 : i32} : i32 { +// CHECK: llvm.mlir.global external @g_i0() +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: i32 { // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: llvm.return %[[C0]] : i32 // CHECK: } @@ -29,7 +32,10 @@ fir.global @g_ci5 constant : i32 { fir.has_value %c : i32 } -// CHECK: llvm.mlir.global external constant @g_ci5() {addr_space = 0 : i32} : i32 { +// CHECK: llvm.mlir.global external constant @g_ci5() +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: i32 { // CHECK: %[[C5:.*]] = llvm.mlir.constant(5 : i32) : i32 // CHECK: llvm.return %[[C5]] : i32 // CHECK: } @@ -37,17 +43,26 @@ fir.global @g_ci5 constant : i32 { // ----- fir.global internal @i_i515 (515:i32) : i32 -// CHECK: llvm.mlir.global internal @i_i515(515 : i32) {addr_space = 0 : i32} : i32 +// CHECK: llvm.mlir.global internal @i_i515(515 : i32) +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: : i32 // ----- fir.global common @C_i511 (0:i32) : i32 -// CHECK: llvm.mlir.global common @C_i511(0 : i32) {addr_space = 0 : i32} : i32 +// CHECK: llvm.mlir.global common @C_i511(0 : i32) +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: : i32 // ----- fir.global weak @w_i86 (86:i32) : i32 -// CHECK: llvm.mlir.global weak @w_i86(86 : i32) {addr_space = 0 : i32} : i32 +// CHECK: llvm.mlir.global weak @w_i86(86 : i32) +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: : i32 // ----- @@ -69,9 +84,13 @@ fir.global @symbol : i64 { fir.has_value %0 : i64 } -// CHECK: %{{.*}} = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr +// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @[[SYMBOL:.*]] : !llvm.ptr +// AMDGPU: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr -// CHECK: llvm.mlir.global external @[[SYMBOL]]() {addr_space = 0 : i32} : i64 { +// CHECK: llvm.mlir.global external @[[SYMBOL]]() +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: i64 { // CHECK: %{{.*}} = llvm.mlir.constant(1 : i64) : i64 // CHECK: llvm.return %{{.*}} : i64 // CHECK: } @@ -88,7 +107,10 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> { fir.has_value %2 : !fir.array<32x32xi32> } -// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x array<32 x i32>> { +// CHECK: llvm.mlir.global internal @_QEmultiarray() +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: : !llvm.array<32 x array<32 x i32>> { // CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>> // CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>> // CHECK: } @@ -105,7 +127,10 @@ fir.global internal @_QEmultiarray : !fir.array<32xi32> { fir.has_value %2 : !fir.array<32xi32> } -// CHECK: llvm.mlir.global internal @_QEmultiarray() {addr_space = 0 : i32} : !llvm.array<32 x i32> { +// CHECK: llvm.mlir.global internal @_QEmultiarray() +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: : !llvm.array<32 x i32> { // CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32> // CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32> @@ -1801,7 +1826,9 @@ func.func @embox1(%arg0: !fir.ref>) { // CHECK: %{{.*}} = llvm.insertvalue %[[VERSION]], %{{.*}}[2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> // CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8 // CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> -// CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr +// GENERIC: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr +// AMDGPU: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr<1> +// AMDGPU: %[[TDESC:.*]] = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<1> to !llvm.ptr // CHECK: %{{.*}} = llvm.insertvalue %[[TDESC]], %{{.*}}[7] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // ----- @@ -2824,7 +2851,10 @@ func.func @coordinate_array_unknown_size_1d(%arg0: !fir.ptr> fir.global common @c_(dense<0> : vector<4294967296xi8>) : !fir.array<4294967296xi8> -// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) {addr_space = 0 : i32} : !llvm.array<4294967296 x i8> +// CHECK: llvm.mlir.global common @c_(dense<0> : vector<4294967296xi8>) +// GENERIC-SAME: {addr_space = 0 : i32} +// AMDGPU-SAME: {addr_space = 1 : i32} +// CHECK-SAME: !llvm.array<4294967296 x i8> // ----- diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index d1f78c32596ba..220eee3cb8b08 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7412,6 +7412,12 @@ static void FixupDebugInfoForOutlinedFunction( } } +static Value *removeASCastIfPresent(Value *V) { + if (Operator::getOpcode(V) == Instruction::AddrSpaceCast) + return cast(V)->getOperand(0); + return V; +} + static Expected createOutlinedFunction( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, @@ -7575,7 +7581,8 @@ static Expected createOutlinedFunction( // preceding mapped arguments that refer to the same global that may be // seperate segments. To prevent this, we defer global processing until all // other processing has been performed. - if (isa(Input)) { + if (llvm::isa( + removeASCastIfPresent(Input))) { DeferredReplacement.push_back(std::make_pair(Input, InputCopy)); continue; }