Skip to content

Conversation

@vzakhari
Copy link
Contributor

This allows LLVM to place the most probably cold blocks
that do the repacking out of the line of the potentially hot code.

@vzakhari vzakhari requested review from jeanPerier and tblah June 19, 2025 03:53
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir flang:codegen labels Jun 19, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 19, 2025

@llvm/pr-subscribers-flang-codegen

Author: Slava Zakharin (vzakhari)

Changes

This allows LLVM to place the most probably cold blocks
that do the repacking out of the line of the potentially hot code.


Patch is 34.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144830.diff

3 Files Affected:

  • (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+10)
  • (modified) flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp (+20-14)
  • (modified) flang/test/Transforms/lower-repack-arrays.fir (+32-32)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 27a6ca4ebdb4e..8ac847dd7dd0a 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2381,6 +2381,16 @@ def fir_IfOp
     static constexpr llvm::StringRef getWeightsAttrAssemblyName() {
       return "weights";
     }
+
+    /// Sets WeightedRegionBranchOpInterface weights to indicate
+    /// that either THEN or ELSE branch is unlikely.
+    /// By default, THEN branch is set to be unlikely.
+    void setUnlikelyIfWeights(bool unlikelyElse = false) {
+      if (unlikelyElse)
+        setWeights({1, 0});
+      else
+        setWeights({0, 1});
+    }
   }];
 }
 
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
index de97a0bbc184a..2774382c22bf7 100644
--- a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -250,6 +250,8 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
 
   fir::IfOp ifOp =
       builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true);
+  // Assume that the repacking is unlikely.
+  ifOp.setUnlikelyIfWeights();
 
   // Return original box.
   builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
@@ -322,20 +324,24 @@ UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
 
     auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne,
                                            tempAddr, originalAddr);
-    builder.genIfThen(loc, isNotSame).genThen([&]() {});
-    // Copy from temporary to the original.
-    if (!op.getNoCopy())
-      fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
-                                   /*resultIsAllocated=*/true);
-
-    // Deallocate, if it was allocated in heap.
-    // Note that the stack attribute does not always mean
-    // that the allocation was actually done in stack memory.
-    // There are currently cases where we delegate the allocation
-    // to the runtime that uses heap memory, even when the stack
-    // attribute is set on fir.pack_array.
-    if (!op.getStack() || !canAllocateTempOnStack(originalBox))
-      builder.create<fir::FreeMemOp>(loc, tempAddr);
+    builder.genIfThen(loc, isNotSame)
+        .genThen([&]() {
+          // Copy from temporary to the original.
+          if (!op.getNoCopy())
+            fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
+                                         /*resultIsAllocated=*/true);
+
+          // Deallocate, if it was allocated in heap.
+          // Note that the stack attribute does not always mean
+          // that the allocation was actually done in stack memory.
+          // There are currently cases where we delegate the allocation
+          // to the runtime that uses heap memory, even when the stack
+          // attribute is set on fir.pack_array.
+          if (!op.getStack() || !canAllocateTempOnStack(originalBox))
+            builder.create<fir::FreeMemOp>(loc, tempAddr);
+        })
+        .getIfOp()
+        .setUnlikelyIfWeights();
   });
   rewriter.eraseOp(op);
   return mlir::success();
diff --git a/flang/test/Transforms/lower-repack-arrays.fir b/flang/test/Transforms/lower-repack-arrays.fir
index 012e957173ac4..458869cce45fd 100644
--- a/flang/test/Transforms/lower-repack-arrays.fir
+++ b/flang/test/Transforms/lower-repack-arrays.fir
@@ -22,7 +22,7 @@ func.func @_QPtest1(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.shape %[[VAL_15]]#1, %[[VAL_16]]#1 : (index, index) -> !fir.shape<2>
@@ -52,7 +52,7 @@ func.func @_QPtest1(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
 // CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_30]], %[[VAL_32]] : index
-// CHECK:             fir.if %[[VAL_33]] {
+// CHECK:             fir.if %[[VAL_33]] weights([0, 1]) {
 // CHECK:               %[[VAL_34:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -87,7 +87,7 @@ func.func @_QPtest1_whole(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name =
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.shape %[[VAL_15]]#1, %[[VAL_16]]#1 : (index, index) -> !fir.shape<2>
@@ -117,7 +117,7 @@ func.func @_QPtest1_whole(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name =
 // CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_30]], %[[VAL_32]] : index
-// CHECK:             fir.if %[[VAL_33]] {
+// CHECK:             fir.if %[[VAL_33]] weights([0, 1]) {
 // CHECK:               %[[VAL_34:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -150,7 +150,7 @@ func.func @_QPtest1_in(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x
 // CHECK:             %[[VAL_10:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_11:.*]] = fir.is_present %[[VAL_10]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_11]] : i1
-// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]] = fir.shape %[[VAL_14]]#1, %[[VAL_15]]#1 : (index, index) -> !fir.shape<2>
@@ -180,7 +180,7 @@ func.func @_QPtest1_in(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x
 // CHECK:             %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = arith.cmpi ne, %[[VAL_29]], %[[VAL_31]] : index
-// CHECK:             fir.if %[[VAL_32]] {
+// CHECK:             fir.if %[[VAL_32]] weights([0, 1]) {
 // CHECK:               fir.freemem %[[VAL_28]] : !fir.heap<!fir.array<?x?xf32>>
 // CHECK:             }
 // CHECK:           }
@@ -209,7 +209,7 @@ func.func @_QPtest1_out(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "
 // CHECK:             %[[VAL_10:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_11:.*]] = fir.is_present %[[VAL_10]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_11]] : i1
-// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]] = fir.shape %[[VAL_14]]#1, %[[VAL_15]]#1 : (index, index) -> !fir.shape<2>
@@ -234,7 +234,7 @@ func.func @_QPtest1_out(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "
 // CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_25]], %[[VAL_27]] : index
-// CHECK:             fir.if %[[VAL_28]] {
+// CHECK:             fir.if %[[VAL_28]] weights([0, 1]) {
 // CHECK:               %[[VAL_29:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_30:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_7]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -280,7 +280,7 @@ func.func @_QPtest2(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.box
 // CHECK:             %[[VAL_17:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_18:.*]] = fir.is_present %[[VAL_17]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_16]], %[[VAL_18]] : i1
-// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_23:.*]] = fir.shape %[[VAL_21]]#1, %[[VAL_22]]#1 : (index, index) -> !fir.shape<2>
@@ -310,7 +310,7 @@ func.func @_QPtest2(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.box
 // CHECK:             %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_36]], %[[VAL_38]] : index
-// CHECK:             fir.if %[[VAL_39]] {
+// CHECK:             fir.if %[[VAL_39]] weights([0, 1]) {
 // CHECK:               %[[VAL_40:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_14]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -356,7 +356,7 @@ func.func @_QPtest2_stack(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !f
 // CHECK:             %[[VAL_17:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_18:.*]] = fir.is_present %[[VAL_17]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_16]], %[[VAL_18]] : i1
-// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_23:.*]] = fir.shape %[[VAL_21]]#1, %[[VAL_22]]#1 : (index, index) -> !fir.shape<2>
@@ -386,7 +386,7 @@ func.func @_QPtest2_stack(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !f
 // CHECK:             %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_36]], %[[VAL_38]] : index
-// CHECK:             fir.if %[[VAL_39]] {
+// CHECK:             fir.if %[[VAL_39]] weights([0, 1]) {
 // CHECK:               %[[VAL_40:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_14]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -420,7 +420,7 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.bindc_n
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> index
@@ -451,7 +451,7 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.bindc_n
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 // CHECK:               %[[VAL_35:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -486,7 +486,7 @@ func.func @_QPtest3_stack(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.b
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> index
@@ -517,7 +517,7 @@ func.func @_QPtest3_stack(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.b
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 // CHECK:               %[[VAL_35:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -553,7 +553,7 @@ func.func @_QPtest4(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,10>>> {fir.bindc_
 // CHECK:             %[[VAL_12:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,10>>>>
 // CHECK:             %[[VAL_13:.*]] = fir.is_present %[[VAL_12]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,10>>>>) -> i1
 // CHECK:             %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_13]] : i1
-// CHECK:             %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) {
+// CHECK:             %[[VAL_15:.*]] = fir.if %[[VAL_14]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) {
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_18:.*]] = fir.shape %[[VAL_16]]#1, %[[VAL_17]]#1 : (index, index) -> !fir.shape<2>
@@ -583,7 +583,7 @@ func.func @_QPtest4(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,10>>> {fir.bindc_
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,10>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,10>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 ...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Jun 19, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Slava Zakharin (vzakhari)

Changes

This allows LLVM to place the most probably cold blocks
that do the repacking out of the line of the potentially hot code.


Patch is 34.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144830.diff

3 Files Affected:

  • (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+10)
  • (modified) flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp (+20-14)
  • (modified) flang/test/Transforms/lower-repack-arrays.fir (+32-32)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 27a6ca4ebdb4e..8ac847dd7dd0a 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2381,6 +2381,16 @@ def fir_IfOp
     static constexpr llvm::StringRef getWeightsAttrAssemblyName() {
       return "weights";
     }
+
+    /// Sets WeightedRegionBranchOpInterface weights to indicate
+    /// that either THEN or ELSE branch is unlikely.
+    /// By default, THEN branch is set to be unlikely.
+    void setUnlikelyIfWeights(bool unlikelyElse = false) {
+      if (unlikelyElse)
+        setWeights({1, 0});
+      else
+        setWeights({0, 1});
+    }
   }];
 }
 
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
index de97a0bbc184a..2774382c22bf7 100644
--- a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -250,6 +250,8 @@ PackArrayConversion::genRepackedBox(fir::FirOpBuilder &builder,
 
   fir::IfOp ifOp =
       builder.create<fir::IfOp>(loc, boxType, doPack, /*withElseRegion=*/true);
+  // Assume that the repacking is unlikely.
+  ifOp.setUnlikelyIfWeights();
 
   // Return original box.
   builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
@@ -322,20 +324,24 @@ UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
 
     auto isNotSame = builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne,
                                            tempAddr, originalAddr);
-    builder.genIfThen(loc, isNotSame).genThen([&]() {});
-    // Copy from temporary to the original.
-    if (!op.getNoCopy())
-      fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
-                                   /*resultIsAllocated=*/true);
-
-    // Deallocate, if it was allocated in heap.
-    // Note that the stack attribute does not always mean
-    // that the allocation was actually done in stack memory.
-    // There are currently cases where we delegate the allocation
-    // to the runtime that uses heap memory, even when the stack
-    // attribute is set on fir.pack_array.
-    if (!op.getStack() || !canAllocateTempOnStack(originalBox))
-      builder.create<fir::FreeMemOp>(loc, tempAddr);
+    builder.genIfThen(loc, isNotSame)
+        .genThen([&]() {
+          // Copy from temporary to the original.
+          if (!op.getNoCopy())
+            fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
+                                         /*resultIsAllocated=*/true);
+
+          // Deallocate, if it was allocated in heap.
+          // Note that the stack attribute does not always mean
+          // that the allocation was actually done in stack memory.
+          // There are currently cases where we delegate the allocation
+          // to the runtime that uses heap memory, even when the stack
+          // attribute is set on fir.pack_array.
+          if (!op.getStack() || !canAllocateTempOnStack(originalBox))
+            builder.create<fir::FreeMemOp>(loc, tempAddr);
+        })
+        .getIfOp()
+        .setUnlikelyIfWeights();
   });
   rewriter.eraseOp(op);
   return mlir::success();
diff --git a/flang/test/Transforms/lower-repack-arrays.fir b/flang/test/Transforms/lower-repack-arrays.fir
index 012e957173ac4..458869cce45fd 100644
--- a/flang/test/Transforms/lower-repack-arrays.fir
+++ b/flang/test/Transforms/lower-repack-arrays.fir
@@ -22,7 +22,7 @@ func.func @_QPtest1(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.shape %[[VAL_15]]#1, %[[VAL_16]]#1 : (index, index) -> !fir.shape<2>
@@ -52,7 +52,7 @@ func.func @_QPtest1(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"})
 // CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_30]], %[[VAL_32]] : index
-// CHECK:             fir.if %[[VAL_33]] {
+// CHECK:             fir.if %[[VAL_33]] weights([0, 1]) {
 // CHECK:               %[[VAL_34:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -87,7 +87,7 @@ func.func @_QPtest1_whole(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name =
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.shape %[[VAL_15]]#1, %[[VAL_16]]#1 : (index, index) -> !fir.shape<2>
@@ -117,7 +117,7 @@ func.func @_QPtest1_whole(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name =
 // CHECK:             %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_30]], %[[VAL_32]] : index
-// CHECK:             fir.if %[[VAL_33]] {
+// CHECK:             fir.if %[[VAL_33]] weights([0, 1]) {
 // CHECK:               %[[VAL_34:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -150,7 +150,7 @@ func.func @_QPtest1_in(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x
 // CHECK:             %[[VAL_10:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_11:.*]] = fir.is_present %[[VAL_10]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_11]] : i1
-// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]] = fir.shape %[[VAL_14]]#1, %[[VAL_15]]#1 : (index, index) -> !fir.shape<2>
@@ -180,7 +180,7 @@ func.func @_QPtest1_in(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x
 // CHECK:             %[[VAL_29:.*]] = fir.convert %[[VAL_28]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_32:.*]] = arith.cmpi ne, %[[VAL_29]], %[[VAL_31]] : index
-// CHECK:             fir.if %[[VAL_32]] {
+// CHECK:             fir.if %[[VAL_32]] weights([0, 1]) {
 // CHECK:               fir.freemem %[[VAL_28]] : !fir.heap<!fir.array<?x?xf32>>
 // CHECK:             }
 // CHECK:           }
@@ -209,7 +209,7 @@ func.func @_QPtest1_out(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "
 // CHECK:             %[[VAL_10:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.box<!fir.array<?x?xf32>>>
 // CHECK:             %[[VAL_11:.*]] = fir.is_present %[[VAL_10]] : (!fir.ref<!fir.box<!fir.array<?x?xf32>>>) -> i1
 // CHECK:             %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_11]] : i1
-// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] -> (!fir.box<!fir.array<?x?xf32>>) {
+// CHECK:             %[[VAL_13:.*]] = fir.if %[[VAL_12]] weights([0, 1]) -> (!fir.box<!fir.array<?x?xf32>>) {
 // CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]] = fir.shape %[[VAL_14]]#1, %[[VAL_15]]#1 : (index, index) -> !fir.shape<2>
@@ -234,7 +234,7 @@ func.func @_QPtest1_out(%arg0: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "
 // CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (!fir.heap<!fir.array<?x?xf32>>) -> index
 // CHECK:             %[[VAL_28:.*]] = arith.cmpi ne, %[[VAL_25]], %[[VAL_27]] : index
-// CHECK:             fir.if %[[VAL_28]] {
+// CHECK:             fir.if %[[VAL_28]] weights([0, 1]) {
 // CHECK:               %[[VAL_29:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_30:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
 // CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_7]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.box<none>
@@ -280,7 +280,7 @@ func.func @_QPtest2(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.box
 // CHECK:             %[[VAL_17:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_18:.*]] = fir.is_present %[[VAL_17]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_16]], %[[VAL_18]] : i1
-// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_23:.*]] = fir.shape %[[VAL_21]]#1, %[[VAL_22]]#1 : (index, index) -> !fir.shape<2>
@@ -310,7 +310,7 @@ func.func @_QPtest2(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.box
 // CHECK:             %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_36]], %[[VAL_38]] : index
-// CHECK:             fir.if %[[VAL_39]] {
+// CHECK:             fir.if %[[VAL_39]] weights([0, 1]) {
 // CHECK:               %[[VAL_40:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_14]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -356,7 +356,7 @@ func.func @_QPtest2_stack(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !f
 // CHECK:             %[[VAL_17:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_18:.*]] = fir.is_present %[[VAL_17]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_19:.*]] = arith.andi %[[VAL_16]], %[[VAL_18]] : i1
-// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_20:.*]] = fir.if %[[VAL_19]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_22:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_23:.*]] = fir.shape %[[VAL_21]]#1, %[[VAL_22]]#1 : (index, index) -> !fir.shape<2>
@@ -386,7 +386,7 @@ func.func @_QPtest2_stack(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !f
 // CHECK:             %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_38:.*]] = fir.convert %[[VAL_37]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_39:.*]] = arith.cmpi ne, %[[VAL_36]], %[[VAL_38]] : index
-// CHECK:             fir.if %[[VAL_39]] {
+// CHECK:             fir.if %[[VAL_39]] weights([0, 1]) {
 // CHECK:               %[[VAL_40:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_41:.*]] = fir.convert %[[VAL_1]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_42:.*]] = fir.convert %[[VAL_14]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -420,7 +420,7 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.bindc_n
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> index
@@ -451,7 +451,7 @@ func.func @_QPtest3(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.bindc_n
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 // CHECK:               %[[VAL_35:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -486,7 +486,7 @@ func.func @_QPtest3_stack(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.b
 // CHECK:             %[[VAL_11:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>
 // CHECK:             %[[VAL_12:.*]] = fir.is_present %[[VAL_11]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,?>>>>) -> i1
 // CHECK:             %[[VAL_13:.*]] = arith.andi %[[VAL_10]], %[[VAL_12]] : i1
-// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+// CHECK:             %[[VAL_14:.*]] = fir.if %[[VAL_13]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
 // CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]] = fir.box_elesize %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> index
@@ -517,7 +517,7 @@ func.func @_QPtest3_stack(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.b
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,?>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 // CHECK:               %[[VAL_35:.*]] = fir.address_of(@{{_QQcl.*}}
 // CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
 // CHECK:               %[[VAL_37:.*]] = fir.convert %[[VAL_8]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !fir.box<none>
@@ -553,7 +553,7 @@ func.func @_QPtest4(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,10>>> {fir.bindc_
 // CHECK:             %[[VAL_12:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) -> !fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,10>>>>
 // CHECK:             %[[VAL_13:.*]] = fir.is_present %[[VAL_12]] : (!fir.ref<!fir.box<!fir.array<?x?x!fir.char<1,10>>>>) -> i1
 // CHECK:             %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_13]] : i1
-// CHECK:             %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) {
+// CHECK:             %[[VAL_15:.*]] = fir.if %[[VAL_14]] weights([0, 1]) -> (!fir.box<!fir.array<?x?x!fir.char<1,10>>>) {
 // CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.char<1,10>>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_18:.*]] = fir.shape %[[VAL_16]]#1, %[[VAL_17]]#1 : (index, index) -> !fir.shape<2>
@@ -583,7 +583,7 @@ func.func @_QPtest4(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,10>>> {fir.bindc_
 // CHECK:             %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (!fir.heap<!fir.array<?x?x!fir.char<1,10>>>) -> index
 // CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.heap<!fir.array<?x?x!fir.char<1,10>>>) -> index
 // CHECK:             %[[VAL_34:.*]] = arith.cmpi ne, %[[VAL_31]], %[[VAL_33]] : index
-// CHECK:             fir.if %[[VAL_34]] {
+// CHECK:             fir.if %[[VAL_34]] weights([0, 1]) {
 ...
[truncated]

Copy link
Contributor

@tblah tblah left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code changes look good. Have you seen improvements in benchmarks from this?

Do we need tests to guard that the attribute makes it to the branch after conversion through cf to llvm dialect?

Copy link
Contributor

@jeanPerier jeanPerier left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No comments on top of Tom's questions. Thanks.

@vzakhari
Copy link
Contributor Author

Code changes look good. Have you seen improvements in benchmarks from this?

Do we need tests to guard that the attribute makes it to the branch after conversion through cf to llvm dialect?

I did not measure it yet, and I do not expect much. This change is mostly for more convenient comparison of the perf profiles with and w/o repacking. I will measure perf after merging this and will present a summary on one of the Flang meetings.

I guess such an end-to-end test is worhwile. We only have individual steps being tested, e.g. in test/Fir/cfg-conversion-if.fir and tests like #144822

I will add a test before merging this.

@vzakhari
Copy link
Contributor Author

Thank you for the reviews!

vzakhari added 2 commits June 19, 2025 10:29
This allows LLVM to place the most probably cold blocks
that do the repacking out of the line of the potentially hot code.
@vzakhari vzakhari force-pushed the flang_cold_repacking branch from b0e23ba to 5b413a2 Compare June 19, 2025 18:15
@vzakhari vzakhari merged commit 8631b4f into llvm:main Jun 19, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 19, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-flang-rhel-clang running on ppc64le-flang-rhel-test while building flang at step 6 "test-build-unified-tree-check-flang".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/31266

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-flang) failure: test (failure)
******************** TEST 'Flang :: Integration/cold_array_repacking.f90' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/bin/flang -fc1 -frepack-arrays -emit-llvm /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90 -o - | /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/bin/FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90 # RUN: at line 3
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/bin/flang -fc1 -frepack-arrays -emit-llvm /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90 -o -
+ /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/build/bin/FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90
/home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90:6:15: error: CHECK-SAME: expected string not found in input
! CHECK-SAME: ptr [[TMP0:%.*]]) {
              ^
<stdin>:14:20: note: scanning from here
define void @test_(ptr %0) #0 {
                   ^

Input file: <stdin>
Check file: /home/buildbots/llvm-external-buildbots/workers/ppc64le-flang-rhel-test/ppc64le-flang-rhel-clang-build/llvm-project/flang/test/Integration/cold_array_repacking.f90

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
        9:  
       10: declare void @free(ptr) 
       11:  
       12: declare ptr @malloc(i64) 
       13:  
       14: define void @test_(ptr %0) #0 { 
same:6                        X~~~~~~~~~~~~ error: no match found
       15:  %2 = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 
same:6     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       16:  %3 = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 
same:6     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       17:  %4 = ptrtoint ptr %0 to i64 
same:6     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       18:  %5 = icmp ne i64 %4, 0 
same:6     ~~~~~~~~~~~~~~~~~~~~~~~~
       19:  br i1 %5, label %6, label %46 
same:6     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        .
        .
        .
>>>>>>

--

********************


vzakhari added a commit that referenced this pull request Jun 19, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:codegen flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants