Fixed handling of dynamically optional PAD.

vzakhari · vzakhari · commit 30e193f90582 · 2025-01-28T12:25:04.000-08:00
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -1002,18 +1002,10 @@ class ReshapeAsElementalConversion
     llvm::SmallVector<mlir::Value, Fortran::common::maxRank> arrayExtents =
         hlfir::genExtentsVector(loc, builder, array);
 
-    mlir::Value arraySize, padSize;
-    llvm::SmallVector<mlir::Value, Fortran::common::maxRank> padExtents;
-    if (pad) {
-      // If PAD is present, we have to use array size to start taking
-      // elements from the PAD array.
-      arraySize = computeArraySize(loc, builder, arrayExtents);
-
-      padExtents = hlfir::genExtentsVector(loc, builder, hlfir::Entity{pad});
-      // PAD size is needed to wrap around the linear index addressing
-      // the PAD array.
-      padSize = computeArraySize(loc, builder, padExtents);
-    }
+    // If PAD is present, we have to use array size to start taking
+    // elements from the PAD array.
+    mlir::Value arraySize =
+        pad ? computeArraySize(loc, builder, arrayExtents) : nullptr;
     hlfir::Entity shape = hlfir::Entity{reshape.getShape()};
     llvm::SmallVector<mlir::Value, Fortran::common::maxRank> resultExtents;
     mlir::Type indexType = builder.getIndexType();
@@ -1037,15 +1029,18 @@ class ReshapeAsElementalConversion
 
         // In the 'else' block, return an element from the PAD.
         builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+        // PAD is dynamically optional, but we can unconditionally access it
+        // in the 'else' block. If we have to start taking elements from it,
+        // then it must be present in a valid program.
+        llvm::SmallVector<mlir::Value, Fortran::common::maxRank> padExtents =
+            hlfir::genExtentsVector(loc, builder, hlfir::Entity{pad});
         // Subtract the ARRAY size from the zero-based linear index
         // to get the zero-based linear index into PAD.
         mlir::Value padLinearIndex =
             builder.create<mlir::arith::SubIOp>(loc, linearIndex, arraySize);
-        // PAD wraps around, when additional elements are needed.
-        padLinearIndex =
-            builder.create<mlir::arith::RemUIOp>(loc, padLinearIndex, padSize);
         llvm::SmallVector<mlir::Value, Fortran::common::maxRank> padIndices =
-            delinearizeIndex(loc, builder, padExtents, padLinearIndex);
+            delinearizeIndex(loc, builder, padExtents, padLinearIndex,
+                             /*wrapAround=*/true);
         mlir::Value padElement =
             hlfir::loadElementAt(loc, builder, hlfir::Entity{pad}, padIndices);
         builder.create<fir::ResultOp>(loc, padElement);
@@ -1055,7 +1050,8 @@ class ReshapeAsElementalConversion
       }
 
       llvm::SmallVector<mlir::Value, Fortran::common::maxRank> arrayIndices =
-          delinearizeIndex(loc, builder, arrayExtents, linearIndex);
+          delinearizeIndex(loc, builder, arrayExtents, linearIndex,
+                           /*wrapAround=*/false);
       mlir::Value arrayElement =
           hlfir::loadElementAt(loc, builder, array, arrayIndices);
 
@@ -1119,33 +1115,39 @@ class ReshapeAsElementalConversion
   ///   ...
   ///   i(n-1) := linearIndex % e(n-1) + 1
   ///   linearIndex := linearIndex / e(n-1)
-  ///   in := linearIndex + 1
+  ///   if (wrapAround) {
+  ///     // If the index is allowed to wrap around, then
+  ///     // we need to modulo it by the last dimension's extent.
+  ///     in := linearIndex % en + 1
+  ///   } else {
+  ///     in := linearIndex + 1
+  ///   }
   static llvm::SmallVector<mlir::Value, Fortran::common::maxRank>
   delinearizeIndex(mlir::Location loc, fir::FirOpBuilder &builder,
-                   mlir::ValueRange extents, mlir::Value linearIndex) {
+                   mlir::ValueRange extents, mlir::Value linearIndex,
+                   bool wrapAround) {
     llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
     mlir::Type indexType = builder.getIndexType();
     mlir::Value one = builder.createIntegerConstant(loc, indexType, 1);
     linearIndex = builder.createConvert(loc, indexType, linearIndex);
 
     for (std::size_t dim = 0; dim < extents.size(); ++dim) {
-      mlir::Value currentIndex;
-      if (dim == extents.size() - 1) {
-        currentIndex = linearIndex;
-      } else {
-        mlir::Value extent =
-            builder.createConvert(loc, indexType, extents[dim]);
+      mlir::Value extent = builder.createConvert(loc, indexType, extents[dim]);
+      // Avoid the modulo for the last index, unless wrap around is allowed.
+      mlir::Value currentIndex = linearIndex;
+      if (dim != extents.size() - 1 || wrapAround)
         currentIndex =
             builder.create<mlir::arith::RemUIOp>(loc, linearIndex, extent);
-        linearIndex =
-            builder.create<mlir::arith::DivUIOp>(loc, linearIndex, extent);
-      }
+      // The result of the last division is unused, so it will be DCEd.
+      linearIndex =
+          builder.create<mlir::arith::DivUIOp>(loc, linearIndex, extent);
       indices.push_back(
           builder.create<mlir::arith::AddIOp>(loc, currentIndex, one));
     }
     return indices;
   }
 
+  /// Return size of an array given its extents.
   static mlir::Value computeArraySize(mlir::Location loc,
                                       fir::FirOpBuilder &builder,
                                       mlir::ValueRange extents) {
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-reshape.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-reshape.fir
@@ -41,11 +41,6 @@ func.func @reshape_with_pad(%arg0: !fir.box<!fir.array<?x?x?xf32>>, %arg1: !fir.
 // CHECK:           %[[ARRAY_DIM2:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
 // CHECK:           %[[VAL_9:.*]] = arith.muli %[[ARRAY_DIM0]]#1, %[[ARRAY_DIM1]]#1 overflow<nuw> : index
 // CHECK:           %[[ARRAY_SIZE:.*]] = arith.muli %[[VAL_9]], %[[ARRAY_DIM2]]#1 overflow<nuw> : index
-// CHECK:           %[[PAD_DIM0:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
-// CHECK:           %[[PAD_DIM1:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
-// CHECK:           %[[PAD_DIM2:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
-// CHECK:           %[[VAL_14:.*]] = arith.muli %[[PAD_DIM0]]#1, %[[PAD_DIM1]]#1 overflow<nuw> : index
-// CHECK:           %[[PAD_SIZE:.*]] = arith.muli %[[VAL_14]], %[[PAD_DIM2]]#1 overflow<nuw> : index
 // CHECK:           %[[VAL_16:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_4]])  : (!fir.ref<!fir.array<2xi32>>, index) -> !fir.ref<i32>
 // CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
 // CHECK:           %[[VAL_18:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_3]])  : (!fir.ref<!fir.array<2xi32>>, index) -> !fir.ref<i32>
@@ -80,15 +75,18 @@ func.func @reshape_with_pad(%arg0: !fir.box<!fir.array<?x?x?xf32>>, %arg1: !fir.
 // CHECK:               %[[VAL_48:.*]] = fir.load %[[VAL_47]] : !fir.ref<f32>
 // CHECK:               fir.result %[[VAL_48]] : f32
 // CHECK:             } else {
+// CHECK:               %[[PAD_DIM0:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[PAD_DIM1:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[PAD_DIM2:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[PAD_LINEAR_INDEX:.*]] = arith.subi %[[LINEAR_INDEX]], %[[ARRAY_SIZE]] overflow<nuw> : index
-// CHECK:               %[[PAD_LINEAR_INDEX_MOD:.*]] = arith.remui %[[PAD_LINEAR_INDEX]], %[[PAD_SIZE]] : index
-// CHECK:               %[[VAL_51:.*]] = arith.remui %[[PAD_LINEAR_INDEX_MOD]], %[[PAD_DIM0]]#1 : index
-// CHECK:               %[[VAL_52:.*]] = arith.divui %[[PAD_LINEAR_INDEX_MOD]], %[[PAD_DIM0]]#1 : index
+// CHECK:               %[[VAL_51:.*]] = arith.remui %[[PAD_LINEAR_INDEX]], %[[PAD_DIM0]]#1 : index
+// CHECK:               %[[VAL_52:.*]] = arith.divui %[[PAD_LINEAR_INDEX]], %[[PAD_DIM0]]#1 : index
 // CHECK:               %[[PAD_IDX0:.*]] = arith.addi %[[VAL_51]], %[[VAL_4]] overflow<nuw> : index
 // CHECK:               %[[VAL_54:.*]] = arith.remui %[[VAL_52]], %[[PAD_DIM1]]#1 : index
 // CHECK:               %[[VAL_55:.*]] = arith.divui %[[VAL_52]], %[[PAD_DIM1]]#1 : index
 // CHECK:               %[[PAD_IDX1:.*]] = arith.addi %[[VAL_54]], %[[VAL_4]] overflow<nuw> : index
-// CHECK:               %[[PAD_IDX2:.*]] = arith.addi %[[VAL_55]], %[[VAL_4]] overflow<nuw> : index
+// CHECK:               %[[VAL_56:.*]] = arith.remui %[[VAL_55]], %[[PAD_DIM2]]#1 : index
+// CHECK:               %[[PAD_IDX2:.*]] = arith.addi %[[VAL_56]], %[[VAL_4]] overflow<nuw> : index
 // CHECK:               %[[VAL_58:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_59:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
 // CHECK:               %[[VAL_60:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)