From 397178e3f0573fba810a180ae8c1a6899d017994 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Thu, 9 Oct 2025 01:12:00 -0700
Subject: [PATCH 1/3] [Function] Add getFMFFromFnAttribute() to derive
 FastMathFlags from function attributes. nfc

---
 llvm/include/llvm/IR/Function.h     | 3 +++
 llvm/lib/Analysis/IVDescriptors.cpp | 6 +-----
 llvm/lib/IR/Function.cpp            | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index d3497716ca844..21e9d5ea9d066 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -470,6 +470,9 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getFnStackAlignment();
   }
 
+  /// Derive fast-math flags from the function attributes.
+  FastMathFlags getFMFFromFnAttribute() const;
+
   /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
   bool hasStackProtectorFnAttr() const;
 
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b8c540ce4b99d..9b842d6ab37c5 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -998,11 +998,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
                                           ScalarEvolution *SE) {
   BasicBlock *Header = TheLoop->getHeader();
   Function &F = *Header->getParent();
-  FastMathFlags FMF;
-  FMF.setNoNaNs(
-      F.getFnAttribute("no-nans-fp-math").getValueAsBool());
-  FMF.setNoSignedZeros(
-      F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
+  FastMathFlags FMF = F.getFMFFromFnAttribute();
 
   if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
                       SE)) {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index fc067459dcba3..d2b41c1c9c2ab 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -784,6 +784,14 @@ uint64_t Function::getFnAttributeAsParsedInteger(StringRef Name,
   return Result;
 }
 
+FastMathFlags Function::getFMFFromFnAttribute() const {
+  FastMathFlags FuncFMF;
+  FuncFMF.setNoNaNs(getFnAttribute("no-nans-fp-math").getValueAsBool());
+  FuncFMF.setNoSignedZeros(
+      getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
+  return FuncFMF;
+}
+
 /// gets the specified attribute from the list of attributes.
 Attribute Function::getParamAttribute(unsigned ArgNo,
                                       Attribute::AttrKind Kind) const {

From 9f341215d252cd736b64b37f26cd3c01a3e2b528 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Thu, 9 Oct 2025 02:45:44 -0700
Subject: [PATCH 2/3] pre-commit test

---
 .../Transforms/LoopVectorize/reduction.ll     | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index 65d57015b0140..c9609686ea520 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1335,3 +1335,62 @@ exit:
   %lcssa.exit = phi i64 [ %phi.sum.next, %loop.latch ]
   ret i64 %lcssa.exit
 }
+
+define float @reduction_sum_float(i64 %n, ptr %a) #0 {
+; CHECK-LABEL: define float @reduction_sum_float(
+; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1]] = fadd <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP1]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[SUM]] = fadd float [[RDX]], [[TMP4]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi float [ [[SUM]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret float [[SUM_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %rdx = phi float [ 0.0, %entry ], [ %sum, %loop ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %sum = fadd float %rdx, %0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  %sum.lcssa = phi float [ %sum, %loop ]
+  ret float %sum.lcssa
+}
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }

From b437fe95ecdbdc5206d4d8a280fbbe9aa41b8fc9 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Thu, 9 Oct 2025 02:54:44 -0700
Subject: [PATCH 3/3] [LoopUtils] Consider fast-math flags derived from
 function attribute when getting recurrence identity

---
 llvm/lib/Transforms/Utils/LoopUtils.cpp         | 8 ++++++--
 llvm/test/Transforms/LoopVectorize/reduction.ll | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index b6ba82288aeb4..c001de4f1d2b5 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1398,8 +1398,10 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
                                    RecurKind RdxKind) {
   auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
   auto getIdentity = [&]() {
+    Function *Fn = Builder.GetInsertBlock()->getParent();
     return getRecurrenceIdentity(RdxKind, SrcVecEltTy,
-                                 Builder.getFastMathFlags());
+                                 Builder.getFastMathFlags() |
+                                     Fn->getFMFFromFnAttribute());
   };
   switch (RdxKind) {
   case RecurKind::AddChainWithSubs:
@@ -1442,7 +1444,9 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
   assert(VPReductionIntrinsic::isVPReduction(VPID) &&
          "No VPIntrinsic for this reduction");
   auto *EltTy = cast<VectorType>(Src->getType())->getElementType();
-  Value *Iden = getRecurrenceIdentity(Kind, EltTy, Builder.getFastMathFlags());
+  Function *Fn = Builder.GetInsertBlock()->getParent();
+  Value *Iden = getRecurrenceIdentity(
+      Kind, EltTy, Builder.getFastMathFlags() | Fn->getFMFFromFnAttribute());
   Value *Ops[] = {Iden, Src, Mask, EVL};
   return Builder.CreateIntrinsic(EltTy, VPID, Ops);
 }
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index c9609686ea520..dffcca89f9813 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1355,7 +1355,7 @@ define float @reduction_sum_float(i64 %n, ptr %a) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP1]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph: