llvm
diff --git a/‎flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp‎
Lines changed: 47 additions & 27 deletions b/‎flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp‎
Lines changed: 47 additions & 27 deletions
diff --git a/‎flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir‎
Lines changed: 50 additions & 0 deletions b/‎flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir‎
Lines changed: 31 additions & 0 deletions b/‎flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir‎
Lines changed: 31 additions & 0 deletions
@@ -469,7 +469,7 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   //   * 1 boolean indicating whether it is the first time
   //     the mask is true.
   //
-  // If precomputeFirst() returns true, then the boolean loop-carried
+  // If useIsFirst() returns false, then the boolean loop-carried
   // value is not used.
   static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
@@ -523,7 +523,7 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
 
   void
   checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
-    if (precomputeFirst())
+    if (!useIsFirst())
       assert(reductions.size() == getNumCoors() + 1 &&
              "invalid number of reductions for MINLOC/MAXLOC");
     else
@@ -540,15 +540,24 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     checkReductions(reductions);
-    assert(!precomputeFirst() && "IsFirst predicate must not be used");
+    assert(useIsFirst() && "IsFirst predicate must not be used");
     return reductions[getNumCoors() + 1];
   }
 
-  // Return true iff the reductions can be initialized
-  // by reading the first element of the array (or its section).
-  // If it returns false, then we use an auxiliary boolean
-  // to identify the very first reduction update.
-  bool precomputeFirst() const { return !getMask(); }
+  // Return true iff the input can contain NaNs, and they should be
+  // honored, such that all-NaNs input must produce the location
+  // of the first unmasked NaN.
+  bool honorNans() const {
+    return !static_cast<bool>(getFastMath() & mlir::arith::FastMathFlags::nnan);
+  }
+
+  // Return true iff we have to use the loop-carried IsFirst predicate.
+  // If there is no mask, we can initialize the reductions using
+  // the first elements of the input.
+  // If NaNs are not honored, we can initialize the starting MIN/MAX
+  // value to +/-LARGEST; the coordinates are guaranteed to be updated
+  // properly for non-empty input without NaNs.
+  bool useIsFirst() const { return getMask() && honorNans(); }
 };
 
 template <typename T>
@@ -557,9 +566,10 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues(
     mlir::ValueRange oneBasedIndices,
     const llvm::SmallVectorImpl<mlir::Value> &extents) {
   fir::IfOp ifOp;
-  if (precomputeFirst()) {
+  if (!useIsFirst() && honorNans()) {
     // Check if we can load the value of the first element in the array
     // or its section (for partial reduction).
+    assert(!getMask() && "cannot fetch first element when mask is present");
     assert(extents.size() == getNumCoors() &&
            "wrong number of extents for MINLOC/MAXLOC reduction");
     mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
@@ -600,7 +610,7 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues(
     builder.create<fir::ResultOp>(loc, result);
     builder.setInsertionPointAfter(ifOp);
     result = ifOp.getResults();
-  } else {
+  } else if (useIsFirst()) {
     // Initial value for isFirst predicate. It is switched to false,
     // when the reduction update dynamically happens inside the reduction
     // loop.
@@ -621,7 +631,7 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
   mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
                                                getCurrentMinMax(currentValue));
-  if (!precomputeFirst()) {
+  if (useIsFirst()) {
     // If isFirst is true, then do the reduction update regardless
     // of the FP comparison.
     cmp =
@@ -652,7 +662,7 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       loc, cmp, elementValue, getCurrentMinMax(currentValue));
   newIndices.push_back(newMinMax);
 
-  if (!precomputeFirst()) {
+  if (useIsFirst()) {
     mlir::Value newIsFirst = builder.createBool(loc, false);
     newIndices.push_back(newIsFirst);
   }
@@ -746,7 +756,7 @@ class MinMaxvalAsElementalConverter
   //
   // The boolean flag is used to replace the initial value
   // with the first input element even if it is NaN.
-  // If precomputeFirst() returns true, then the boolean loop-carried
+  // If useIsFirst() returns false, then the boolean loop-carried
   // value is not used.
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
   using Base = NumericReductionAsElementalConverterBase<T>;
@@ -781,13 +791,13 @@ class MinMaxvalAsElementalConverter
     mlir::Value currentMinMax = getCurrentMinMax(currentValue);
     mlir::Value cmp =
         genMinMaxComparison<isMax>(loc, builder, elementValue, currentMinMax);
-    if (!precomputeFirst())
+    if (useIsFirst())
       cmp = builder.create<mlir::arith::OrIOp>(loc, cmp,
                                                getIsFirst(currentValue));
     mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
         loc, cmp, elementValue, currentMinMax);
     result.push_back(newMinMax);
-    if (!precomputeFirst())
+    if (useIsFirst())
       result.push_back(builder.createBool(loc, false));
     return result;
   }
@@ -813,17 +823,25 @@ class MinMaxvalAsElementalConverter
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     this->checkReductions(reductions);
-    assert(!precomputeFirst() && "IsFirst predicate must not be used");
+    assert(useIsFirst() && "IsFirst predicate must not be used");
     return reductions[1];
   }
 
-  // Return true iff the reductions can be initialized
-  // by reading the first element of the array (or its section).
-  // If it returns false, then we use an auxiliary boolean
-  // to identify the very first reduction update.
-  bool precomputeFirst() const { return !this->getMask(); }
+  // Return true iff the input can contain NaNs, and they should be
+  // honored, such that all-NaNs input must produce NaN result.
+  bool honorNans() const {
+    return !static_cast<bool>(this->getFastMath() &
+                              mlir::arith::FastMathFlags::nnan);
+  }
+
+  // Return true iff we have to use the loop-carried IsFirst predicate.
+  // If there is no mask, we can initialize the reductions using
+  // the first elements of the input.
+  // If NaNs are not honored, we can initialize the starting MIN/MAX
+  // value to +/-LARGEST.
+  bool useIsFirst() const { return this->getMask() && honorNans(); }
 
-  std::size_t getNumReductions() const { return precomputeFirst() ? 1 : 2; }
+  std::size_t getNumReductions() const { return useIsFirst() ? 2 : 1; }
 };
 
 template <typename T>
@@ -836,12 +854,14 @@ MinMaxvalAsElementalConverter<T>::genReductionInitValues(
   mlir::Location loc = this->loc;
 
   fir::IfOp ifOp;
-  if (precomputeFirst()) {
+  if (!useIsFirst() && honorNans()) {
     // Check if we can load the value of the first element in the array
     // or its section (for partial reduction).
-    assert(extents.size() == this->isTotalReduction()
-               ? this->getSourceRank()
-               : 1u && "wrong number of extents for MINVAL/MAXVAL reduction");
+    assert(!this->getMask() &&
+           "cannot fetch first element when mask is present");
+    assert(extents.size() ==
+               (this->isTotalReduction() ? this->getSourceRank() : 1u) &&
+           "wrong number of extents for MINVAL/MAXVAL reduction");
     mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
     llvm::SmallVector<mlir::Value> indices = genFirstElementIndicesForReduction(
         loc, builder, this->isTotalReduction(), this->getConstDim(),
@@ -867,7 +887,7 @@ MinMaxvalAsElementalConverter<T>::genReductionInitValues(
     builder.create<fir::ResultOp>(loc, result);
     builder.setInsertionPointAfter(ifOp);
     result = ifOp.getResults();
-  } else {
+  } else if (useIsFirst()) {
     // Initial value for isFirst predicate. It is switched to false,
     // when the reduction update dynamically happens inside the reduction
     // loop.
 
@@ -417,6 +417,56 @@ func.func @test_partial_var_nomask(%input: !fir.box<!fir.array<?x?x?xf32>>) -> !
 // CHECK:           return %[[VAL_11]] : !hlfir.expr<?x?xi32>
 // CHECK:         }
 
+// Test that 'nnan' allows using -LARGEST value as the reduction init.
+func.func @test_total_expr_nnan(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nnan(
+// CHECK-SAME:                                    %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]]:4 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_11]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_6]], %[[VAL_15:.*]] = %[[VAL_6]], %[[VAL_16:.*]] = %[[VAL_6]], %[[VAL_17:.*]] = %[[VAL_5]]) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_18:.*]]:4 = fir.do_loop %[[VAL_19:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_4]] iter_args(%[[VAL_20:.*]] = %[[VAL_14]], %[[VAL_21:.*]] = %[[VAL_15]], %[[VAL_22:.*]] = %[[VAL_16]], %[[VAL_23:.*]] = %[[VAL_17]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_24:.*]]:4 = fir.do_loop %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_21]], %[[VAL_28:.*]] = %[[VAL_22]], %[[VAL_29:.*]] = %[[VAL_23]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_30:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_19]], %[[VAL_13]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_31:.*]] = arith.cmpf ogt, %[[VAL_30]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_32:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_33:.*]] = arith.cmpf oeq, %[[VAL_30]], %[[VAL_30]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1
+// CHECK:                 %[[VAL_35:.*]] = arith.ori %[[VAL_31]], %[[VAL_34]] : i1
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK:                 %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_26]] : i32
+// CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
+// CHECK:                 %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_38]], %[[VAL_27]] : i32
+// CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_13]] : (index) -> i32
+// CHECK:                 %[[VAL_41:.*]] = arith.select %[[VAL_35]], %[[VAL_40]], %[[VAL_28]] : i32
+// CHECK:                 %[[VAL_42:.*]] = arith.select %[[VAL_35]], %[[VAL_30]], %[[VAL_29]] : f32
+// CHECK:                 fir.result %[[VAL_37]], %[[VAL_39]], %[[VAL_41]], %[[VAL_42]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_43:.*]]#0, %[[VAL_43]]#1, %[[VAL_43]]#2, %[[VAL_43]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_44:.*]]#0, %[[VAL_44]]#1, %[[VAL_44]]#2, %[[VAL_44]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_45:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_4]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46:.*]]#0 to %[[VAL_45]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_47:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#1 to %[[VAL_47]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_48:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#2 to %[[VAL_48]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_49:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_49]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
 // Character comparisons are not supported yet.
 func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
   %0 = hlfir.maxloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
 
@@ -268,3 +268,34 @@ func.func @test_total_var_nomask(%input: !fir.box<!fir.array<?x?xf16>>) -> f16 {
 // CHECK:           }
 // CHECK:           return %[[VAL_14]] : f16
 // CHECK:         }
+
+// Test that 'nnan' allows using -LARGEST value as the reduction init.
+func.func @test_partial_expr_nnan(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nnan(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: index):
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] iter_args(%[[VAL_11:.*]] = %[[VAL_2]]) -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_8]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_13:.*]] = arith.cmpf ogt, %[[VAL_12]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_14:.*]] = arith.cmpf une, %[[VAL_11]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_15:.*]] = arith.cmpf oeq, %[[VAL_12]], %[[VAL_12]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:               %[[VAL_17:.*]] = arith.ori %[[VAL_13]], %[[VAL_16]] : i1
+// CHECK:               %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_11]] : f64
+// CHECK:               fir.result %[[VAL_18]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_9]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : !hlfir.expr<?xf64>
+// CHECK:         }