Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit #130386

ArcaneNibble · 2025-03-08T03:05:35Z

Previous PR: #129308

Changes: I added REQUIRES: x86-registered-target to the newly-added test so that it won't break the other buildbots.

llvmbot · 2025-03-08T03:06:08Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: R (ArcaneNibble)

Changes

Previous PR: #129308

Changes: I added REQUIRES: x86-registered-target to the newly-added test so that it won't break the other buildbots.

Full diff: https://github.com/llvm/llvm-project/pull/130386.diff

7 Files Affected:

(modified) flang/lib/Optimizer/CodeGen/CodeGen.cpp (+13-7)
(modified) flang/lib/Optimizer/CodeGen/TypeConverter.cpp (+18-1)
(added) flang/test/Fir/alloc-32.fir (+30)
(modified) flang/test/Fir/alloc.fir (+3)
(modified) flang/test/Integration/OpenMP/private-global.f90 (+4-1)
(modified) flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 (+4)
(modified) flang/test/Lower/forall/character-1.f90 (+3)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743edd7844a..caae5e6465d3c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -982,7 +982,8 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
 template <typename ModuleOp>
 static mlir::SymbolRefAttr
 getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
-                  mlir::ConversionPatternRewriter &rewriter) {
+                  mlir::ConversionPatternRewriter &rewriter,
+                  mlir::Type indexType) {
   static constexpr char mallocName[] = "malloc";
   if (auto mallocFunc =
           mod.template lookupSymbol<mlir::LLVM::LLVMFuncOp>(mallocName))
@@ -992,7 +993,6 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
     return mlir::SymbolRefAttr::get(userMalloc);
 
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
-  auto indexType = mlir::IntegerType::get(op.getContext(), 64);
   auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
       op.getLoc(), mallocName,
       mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()),
@@ -1002,12 +1002,13 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
 }
 
 /// Return the LLVMFuncOp corresponding to the standard malloc call.
-static mlir::SymbolRefAttr
-getMalloc(fir::AllocMemOp op, mlir::ConversionPatternRewriter &rewriter) {
+static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
+                                     mlir::ConversionPatternRewriter &rewriter,
+                                     mlir::Type indexType) {
   if (auto mod = op->getParentOfType<mlir::gpu::GPUModuleOp>())
-    return getMallocInModule(mod, op, rewriter);
+    return getMallocInModule(mod, op, rewriter, indexType);
   auto mod = op->getParentOfType<mlir::ModuleOp>();
-  return getMallocInModule(mod, op, rewriter);
+  return getMallocInModule(mod, op, rewriter, indexType);
 }
 
 /// Helper function for generating the LLVM IR that computes the distance
@@ -1067,7 +1068,12 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
           loc, ity, size, integerCast(loc, rewriter, ity, opnd));
-    heap->setAttr("callee", getMalloc(heap, rewriter));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(loc, rewriter, mallocTy, size);
+    heap->setAttr("callee", getMalloc(heap, rewriter, mallocTy));
     rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
         heap, ::getLlvmPtrType(heap.getContext()), size,
         addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1));
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 89f498433806e..1a1d3a8cfb870 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -28,10 +28,27 @@
 
 namespace fir {
 
+static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) {
+  llvm::StringRef dataLayoutString;
+  auto dataLayoutAttr = module->template getAttrOfType<mlir::StringAttr>(
+      mlir::LLVM::LLVMDialect::getDataLayoutAttrName());
+  if (dataLayoutAttr)
+    dataLayoutString = dataLayoutAttr.getValue();
+
+  auto options = mlir::LowerToLLVMOptions(module.getContext());
+  auto llvmDL = llvm::DataLayout(dataLayoutString);
+  if (llvmDL.getPointerSizeInBits(0) == 32) {
+    // FIXME: Should translateDataLayout in the MLIR layer be doing this?
+    options.overrideIndexBitwidth(32);
+  }
+  options.dataLayout = llvmDL;
+  return options;
+}
+
 LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
                                      bool forceUnifiedTBAATree,
                                      const mlir::DataLayout &dl)
-    : mlir::LLVMTypeConverter(module.getContext()),
+    : mlir::LLVMTypeConverter(module.getContext(), MakeLowerOptions(module)),
       kindMapping(getKindMapping(module)),
       specifics(CodeGenSpecifics::get(
           module.getContext(), getTargetTriple(module), getKindMapping(module),
diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir
new file mode 100644
index 0000000000000..3eefc3225fac7
--- /dev/null
+++ b/flang/test/Fir/alloc-32.fir
@@ -0,0 +1,30 @@
+// RUN: %flang_fc1 -triple i686 -emit-llvm  %s -o - | FileCheck %s
+// REQUIRES: x86-registered-target
+
+// This is a check for calling malloc using i32 when on a 32-bit target (only).
+// It doesn't contain the comprehensive tests that alloc.fir has, and
+// that file should be used to exercise most code paths.
+
+module attributes {
+    fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "i686"
+} {
+
+// CHECK-LABEL: define ptr @allocmem_scalar_nonchar(
+// CHECK: call ptr @malloc(i32 4)
+func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> {
+  %1 = fir.allocmem i32
+  return %1 : !fir.heap<i32>
+}
+
+// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
+// CHECK: %[[trunc:.*]] = trunc i64 %[[mul2]] to i32
+// CHECK: call ptr @malloc(i32 %[[trunc]])
+func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> {
+  %1 = fir.allocmem !fir.char<1,?>(%l : i32)
+  return %1 : !fir.heap<!fir.char<1,?>>
+}
+
+}
diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir
index ba9b08dad7764..421c06a4780ea 100644
--- a/flang/test/Fir/alloc.fir
+++ b/flang/test/Fir/alloc.fir
@@ -2,6 +2,9 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
 // UNSUPPORTED: system-windows
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
 
 // CHECK-LABEL: define ptr @alloca_scalar_nonchar()
 // CHECK: alloca i32, i64 1
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 1aacfb4c87198..a177c0e57ae47 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -1,4 +1,7 @@
 !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+!UNSUPPORTED: target-x86
+!UNSUPPORTED: target=sparc-{{.*}}
+!UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Regression test for https://github.com/llvm/llvm-project/issues/106297
 
@@ -36,7 +39,7 @@ program bug
 ! CHECK:         %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
 ! CHECK:         store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align 8
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 48, i1 false)
-! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9)
+! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 12)
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false)
 ! CHECK:         %[[PRIV_TABLE:.*]] = call ptr @malloc(i64 40)
 ! ...
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index be25169e7d83e..953cc2cf2b999 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -4,6 +4,10 @@
 ! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 \
 ! RUN: | FileCheck %s
 
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
+
 subroutine proc
   implicit none
   real(8),allocatable :: F(:)
diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90
index d5f968ba93450..ef15f5b0cc3c2 100644
--- a/flang/test/Lower/forall/character-1.f90
+++ b/flang/test/Lower/forall/character-1.f90
@@ -2,6 +2,9 @@
 ! RUN: %flang -emit-llvm -flang-deprecated-no-hlfir -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s
 ! Test from Fortran source through to LLVM IR.
 ! UNSUPPORTED: system-windows
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Assumed size array of assumed length character.
 program test

llvmbot · 2025-03-08T03:06:09Z

@llvm/pr-subscribers-flang-codegen

Author: R (ArcaneNibble)

Changes

Previous PR: #129308

Changes: I added REQUIRES: x86-registered-target to the newly-added test so that it won't break the other buildbots.

Full diff: https://github.com/llvm/llvm-project/pull/130386.diff

7 Files Affected:

(modified) flang/lib/Optimizer/CodeGen/CodeGen.cpp (+13-7)
(modified) flang/lib/Optimizer/CodeGen/TypeConverter.cpp (+18-1)
(added) flang/test/Fir/alloc-32.fir (+30)
(modified) flang/test/Fir/alloc.fir (+3)
(modified) flang/test/Integration/OpenMP/private-global.f90 (+4-1)
(modified) flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 (+4)
(modified) flang/test/Lower/forall/character-1.f90 (+3)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743edd7844a..caae5e6465d3c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -982,7 +982,8 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
 template <typename ModuleOp>
 static mlir::SymbolRefAttr
 getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
-                  mlir::ConversionPatternRewriter &rewriter) {
+                  mlir::ConversionPatternRewriter &rewriter,
+                  mlir::Type indexType) {
   static constexpr char mallocName[] = "malloc";
   if (auto mallocFunc =
           mod.template lookupSymbol<mlir::LLVM::LLVMFuncOp>(mallocName))
@@ -992,7 +993,6 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
     return mlir::SymbolRefAttr::get(userMalloc);
 
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
-  auto indexType = mlir::IntegerType::get(op.getContext(), 64);
   auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
       op.getLoc(), mallocName,
       mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()),
@@ -1002,12 +1002,13 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
 }
 
 /// Return the LLVMFuncOp corresponding to the standard malloc call.
-static mlir::SymbolRefAttr
-getMalloc(fir::AllocMemOp op, mlir::ConversionPatternRewriter &rewriter) {
+static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
+                                     mlir::ConversionPatternRewriter &rewriter,
+                                     mlir::Type indexType) {
   if (auto mod = op->getParentOfType<mlir::gpu::GPUModuleOp>())
-    return getMallocInModule(mod, op, rewriter);
+    return getMallocInModule(mod, op, rewriter, indexType);
   auto mod = op->getParentOfType<mlir::ModuleOp>();
-  return getMallocInModule(mod, op, rewriter);
+  return getMallocInModule(mod, op, rewriter, indexType);
 }
 
 /// Helper function for generating the LLVM IR that computes the distance
@@ -1067,7 +1068,12 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
           loc, ity, size, integerCast(loc, rewriter, ity, opnd));
-    heap->setAttr("callee", getMalloc(heap, rewriter));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(loc, rewriter, mallocTy, size);
+    heap->setAttr("callee", getMalloc(heap, rewriter, mallocTy));
     rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
         heap, ::getLlvmPtrType(heap.getContext()), size,
         addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1));
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 89f498433806e..1a1d3a8cfb870 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -28,10 +28,27 @@
 
 namespace fir {
 
+static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) {
+  llvm::StringRef dataLayoutString;
+  auto dataLayoutAttr = module->template getAttrOfType<mlir::StringAttr>(
+      mlir::LLVM::LLVMDialect::getDataLayoutAttrName());
+  if (dataLayoutAttr)
+    dataLayoutString = dataLayoutAttr.getValue();
+
+  auto options = mlir::LowerToLLVMOptions(module.getContext());
+  auto llvmDL = llvm::DataLayout(dataLayoutString);
+  if (llvmDL.getPointerSizeInBits(0) == 32) {
+    // FIXME: Should translateDataLayout in the MLIR layer be doing this?
+    options.overrideIndexBitwidth(32);
+  }
+  options.dataLayout = llvmDL;
+  return options;
+}
+
 LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
                                      bool forceUnifiedTBAATree,
                                      const mlir::DataLayout &dl)
-    : mlir::LLVMTypeConverter(module.getContext()),
+    : mlir::LLVMTypeConverter(module.getContext(), MakeLowerOptions(module)),
       kindMapping(getKindMapping(module)),
       specifics(CodeGenSpecifics::get(
           module.getContext(), getTargetTriple(module), getKindMapping(module),
diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir
new file mode 100644
index 0000000000000..3eefc3225fac7
--- /dev/null
+++ b/flang/test/Fir/alloc-32.fir
@@ -0,0 +1,30 @@
+// RUN: %flang_fc1 -triple i686 -emit-llvm  %s -o - | FileCheck %s
+// REQUIRES: x86-registered-target
+
+// This is a check for calling malloc using i32 when on a 32-bit target (only).
+// It doesn't contain the comprehensive tests that alloc.fir has, and
+// that file should be used to exercise most code paths.
+
+module attributes {
+    fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "i686"
+} {
+
+// CHECK-LABEL: define ptr @allocmem_scalar_nonchar(
+// CHECK: call ptr @malloc(i32 4)
+func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> {
+  %1 = fir.allocmem i32
+  return %1 : !fir.heap<i32>
+}
+
+// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
+// CHECK: %[[trunc:.*]] = trunc i64 %[[mul2]] to i32
+// CHECK: call ptr @malloc(i32 %[[trunc]])
+func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> {
+  %1 = fir.allocmem !fir.char<1,?>(%l : i32)
+  return %1 : !fir.heap<!fir.char<1,?>>
+}
+
+}
diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir
index ba9b08dad7764..421c06a4780ea 100644
--- a/flang/test/Fir/alloc.fir
+++ b/flang/test/Fir/alloc.fir
@@ -2,6 +2,9 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
 // UNSUPPORTED: system-windows
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
 
 // CHECK-LABEL: define ptr @alloca_scalar_nonchar()
 // CHECK: alloca i32, i64 1
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 1aacfb4c87198..a177c0e57ae47 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -1,4 +1,7 @@
 !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+!UNSUPPORTED: target-x86
+!UNSUPPORTED: target=sparc-{{.*}}
+!UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Regression test for https://github.com/llvm/llvm-project/issues/106297
 
@@ -36,7 +39,7 @@ program bug
 ! CHECK:         %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
 ! CHECK:         store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align 8
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 48, i1 false)
-! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9)
+! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 12)
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false)
 ! CHECK:         %[[PRIV_TABLE:.*]] = call ptr @malloc(i64 40)
 ! ...
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index be25169e7d83e..953cc2cf2b999 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -4,6 +4,10 @@
 ! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 \
 ! RUN: | FileCheck %s
 
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
+
 subroutine proc
   implicit none
   real(8),allocatable :: F(:)
diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90
index d5f968ba93450..ef15f5b0cc3c2 100644
--- a/flang/test/Lower/forall/character-1.f90
+++ b/flang/test/Lower/forall/character-1.f90
@@ -2,6 +2,9 @@
 ! RUN: %flang -emit-llvm -flang-deprecated-no-hlfir -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s
 ! Test from Fortran source through to LLVM IR.
 ! UNSUPPORTED: system-windows
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Assumed size array of assumed length character.
 program test

llvmbot · 2025-03-08T03:06:09Z

@llvm/pr-subscribers-flang-openmp

Author: R (ArcaneNibble)

Changes

Previous PR: #129308

Changes: I added REQUIRES: x86-registered-target to the newly-added test so that it won't break the other buildbots.

Full diff: https://github.com/llvm/llvm-project/pull/130386.diff

7 Files Affected:

(modified) flang/lib/Optimizer/CodeGen/CodeGen.cpp (+13-7)
(modified) flang/lib/Optimizer/CodeGen/TypeConverter.cpp (+18-1)
(added) flang/test/Fir/alloc-32.fir (+30)
(modified) flang/test/Fir/alloc.fir (+3)
(modified) flang/test/Integration/OpenMP/private-global.f90 (+4-1)
(modified) flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 (+4)
(modified) flang/test/Lower/forall/character-1.f90 (+3)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743edd7844a..caae5e6465d3c 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -982,7 +982,8 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
 template <typename ModuleOp>
 static mlir::SymbolRefAttr
 getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
-                  mlir::ConversionPatternRewriter &rewriter) {
+                  mlir::ConversionPatternRewriter &rewriter,
+                  mlir::Type indexType) {
   static constexpr char mallocName[] = "malloc";
   if (auto mallocFunc =
           mod.template lookupSymbol<mlir::LLVM::LLVMFuncOp>(mallocName))
@@ -992,7 +993,6 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
     return mlir::SymbolRefAttr::get(userMalloc);
 
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
-  auto indexType = mlir::IntegerType::get(op.getContext(), 64);
   auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
       op.getLoc(), mallocName,
       mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()),
@@ -1002,12 +1002,13 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
 }
 
 /// Return the LLVMFuncOp corresponding to the standard malloc call.
-static mlir::SymbolRefAttr
-getMalloc(fir::AllocMemOp op, mlir::ConversionPatternRewriter &rewriter) {
+static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
+                                     mlir::ConversionPatternRewriter &rewriter,
+                                     mlir::Type indexType) {
   if (auto mod = op->getParentOfType<mlir::gpu::GPUModuleOp>())
-    return getMallocInModule(mod, op, rewriter);
+    return getMallocInModule(mod, op, rewriter, indexType);
   auto mod = op->getParentOfType<mlir::ModuleOp>();
-  return getMallocInModule(mod, op, rewriter);
+  return getMallocInModule(mod, op, rewriter, indexType);
 }
 
 /// Helper function for generating the LLVM IR that computes the distance
@@ -1067,7 +1068,12 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
           loc, ity, size, integerCast(loc, rewriter, ity, opnd));
-    heap->setAttr("callee", getMalloc(heap, rewriter));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(loc, rewriter, mallocTy, size);
+    heap->setAttr("callee", getMalloc(heap, rewriter, mallocTy));
     rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
         heap, ::getLlvmPtrType(heap.getContext()), size,
         addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1));
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 89f498433806e..1a1d3a8cfb870 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -28,10 +28,27 @@
 
 namespace fir {
 
+static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) {
+  llvm::StringRef dataLayoutString;
+  auto dataLayoutAttr = module->template getAttrOfType<mlir::StringAttr>(
+      mlir::LLVM::LLVMDialect::getDataLayoutAttrName());
+  if (dataLayoutAttr)
+    dataLayoutString = dataLayoutAttr.getValue();
+
+  auto options = mlir::LowerToLLVMOptions(module.getContext());
+  auto llvmDL = llvm::DataLayout(dataLayoutString);
+  if (llvmDL.getPointerSizeInBits(0) == 32) {
+    // FIXME: Should translateDataLayout in the MLIR layer be doing this?
+    options.overrideIndexBitwidth(32);
+  }
+  options.dataLayout = llvmDL;
+  return options;
+}
+
 LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
                                      bool forceUnifiedTBAATree,
                                      const mlir::DataLayout &dl)
-    : mlir::LLVMTypeConverter(module.getContext()),
+    : mlir::LLVMTypeConverter(module.getContext(), MakeLowerOptions(module)),
       kindMapping(getKindMapping(module)),
       specifics(CodeGenSpecifics::get(
           module.getContext(), getTargetTriple(module), getKindMapping(module),
diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir
new file mode 100644
index 0000000000000..3eefc3225fac7
--- /dev/null
+++ b/flang/test/Fir/alloc-32.fir
@@ -0,0 +1,30 @@
+// RUN: %flang_fc1 -triple i686 -emit-llvm  %s -o - | FileCheck %s
+// REQUIRES: x86-registered-target
+
+// This is a check for calling malloc using i32 when on a 32-bit target (only).
+// It doesn't contain the comprehensive tests that alloc.fir has, and
+// that file should be used to exercise most code paths.
+
+module attributes {
+    fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "i686"
+} {
+
+// CHECK-LABEL: define ptr @allocmem_scalar_nonchar(
+// CHECK: call ptr @malloc(i32 4)
+func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> {
+  %1 = fir.allocmem i32
+  return %1 : !fir.heap<i32>
+}
+
+// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
+// CHECK: %[[trunc:.*]] = trunc i64 %[[mul2]] to i32
+// CHECK: call ptr @malloc(i32 %[[trunc]])
+func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> {
+  %1 = fir.allocmem !fir.char<1,?>(%l : i32)
+  return %1 : !fir.heap<!fir.char<1,?>>
+}
+
+}
diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir
index ba9b08dad7764..421c06a4780ea 100644
--- a/flang/test/Fir/alloc.fir
+++ b/flang/test/Fir/alloc.fir
@@ -2,6 +2,9 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
 // UNSUPPORTED: system-windows
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
 
 // CHECK-LABEL: define ptr @alloca_scalar_nonchar()
 // CHECK: alloca i32, i64 1
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 1aacfb4c87198..a177c0e57ae47 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -1,4 +1,7 @@
 !RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+!UNSUPPORTED: target-x86
+!UNSUPPORTED: target=sparc-{{.*}}
+!UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Regression test for https://github.com/llvm/llvm-project/issues/106297
 
@@ -36,7 +39,7 @@ program bug
 ! CHECK:         %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
 ! CHECK:         store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align 8
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 48, i1 false)
-! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9)
+! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 12)
 ! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false)
 ! CHECK:         %[[PRIV_TABLE:.*]] = call ptr @malloc(i64 40)
 ! ...
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index be25169e7d83e..953cc2cf2b999 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -4,6 +4,10 @@
 ! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 \
 ! RUN: | FileCheck %s
 
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
+
 subroutine proc
   implicit none
   real(8),allocatable :: F(:)
diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90
index d5f968ba93450..ef15f5b0cc3c2 100644
--- a/flang/test/Lower/forall/character-1.f90
+++ b/flang/test/Lower/forall/character-1.f90
@@ -2,6 +2,9 @@
 ! RUN: %flang -emit-llvm -flang-deprecated-no-hlfir -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s
 ! Test from Fortran source through to LLVM IR.
 ! UNSUPPORTED: system-windows
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Assumed size array of assumed length character.
 program test

ArcaneNibble · 2025-03-08T03:09:10Z

Apologies for all the noise, this is the correct PR from the correct branch with the buildbot issue fixed

tblah · 2025-03-10T10:27:04Z

flang/test/Lower/OpenMP/parallel-reduction-mixed.f90

+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}


Why is this test unsupported on those platforms?

I disabled them because they just so happen to invoke malloc and thus no longer match exactly on these 32-bit platforms. This is because malloc now gets called with an i32. I figured it was okay since 32-bit wasn't officially supported or being tested by the public builders.

Thanks for your quick response. If possible I would like to keep the test coverage. For these tests the test doesn't really care what type is used with malloc, so long as the right number is used. So you could do something like

!CHECK: call ptr @malloc(i{{[0-9]+}} 80)

I just fixed the OpenMP-related tests. character-1.f90 remains disabled as it really does dynamically allocate memory, and the resulting additional trunc instructions cannot easily be matched.

As a result, this PR now includes some drive-by fixes in XReboxOpConversion as well as inside OpenMPIRBuilder::createReductions because, ironically, parallel-reduction-mixed.f90 seems as if it never worked correctly on 32-bit (calling __kmpc_reduce with the wrong type).

tblah

Thanks for all of the updates. LGTM (with one nit)

nit: please could you add a comment to the tests which are still disabled on some platforms explaining why

…lloc on 32-bit (llvm#129308)" Changes: * The alloc-32.fir test is now marked as requiring the X86 target. * Drive-by fixes uncovered when fixing tests involving malloc

ArcaneNibble requested a review from clementval March 8, 2025 03:05

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir flang:openmp flang:codegen labels Mar 8, 2025

tblah reviewed Mar 10, 2025

View reviewed changes

ArcaneNibble force-pushed the wasm-flang-fixes-maybe-pr branch from 15fba5e to b7093a9 Compare March 10, 2025 21:49

llvmbot added the clang:openmp OpenMP related changes to Clang label Mar 10, 2025

tblah approved these changes Mar 10, 2025

View reviewed changes

Reapply "[flang] In AllocMemOp lowering, convert types for calling ma…

85aa012

…lloc on 32-bit (llvm#129308)" Changes: * The alloc-32.fir test is now marked as requiring the X86 target. * Drive-by fixes uncovered when fixing tests involving malloc

ArcaneNibble force-pushed the wasm-flang-fixes-maybe-pr branch from b7093a9 to 85aa012 Compare March 10, 2025 22:13

ArcaneNibble merged commit 1dffe8f into llvm:main Mar 11, 2025
9 of 11 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit #130386

Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit #130386

Uh oh!

ArcaneNibble commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

ArcaneNibble commented Mar 8, 2025

Uh oh!

tblah Mar 10, 2025

Uh oh!

ArcaneNibble Mar 10, 2025

Uh oh!

tblah Mar 10, 2025

Uh oh!

ArcaneNibble Mar 10, 2025

Uh oh!

tblah left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit #130386

Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit #130386

Uh oh!

Conversation

ArcaneNibble commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

llvmbot commented Mar 8, 2025

Uh oh!

ArcaneNibble commented Mar 8, 2025

Uh oh!

tblah Mar 10, 2025

Choose a reason for hiding this comment

Uh oh!

ArcaneNibble Mar 10, 2025

Choose a reason for hiding this comment

Uh oh!

tblah Mar 10, 2025

Choose a reason for hiding this comment

Uh oh!

ArcaneNibble Mar 10, 2025

Choose a reason for hiding this comment

Uh oh!

tblah left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants