llvm · vzakhari · May 27, 2025 · May 28, 2025 · Jun 3, 2025 · Jun 11, 2025
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
@@ -7593,6 +7593,24 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving
 multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0
 then the interleave count will be determined automatically.
 
+'``llvm.loop.vectorize.reassociate_fpreductions.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata selectively allows or disallows reassociating floating-point
+reductions, which otherwise may be unsafe to reassociate, during the loop
+vectorization. For example, a floating point ``ADD`` reduction without
+``reassoc`` fast-math flags may be vectorized provided that this metadata
+allows it. The first operand is the string
+``llvm.loop.vectorize.reassociate_fpreductions.enable``
+and the second operand is a bit. If the bit operand value is 1 unsafe
+reduction reassociations are enabled. A value of 0 disables unsafe
+reduction reassociations.
+
+.. code-block:: llvm
+
+   !0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0}
+   !1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 1}
+
 '``llvm.loop.vectorize.enable``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -64,7 +64,8 @@ class LoopVectorizeHints {
     HK_FORCE,
     HK_ISVECTORIZED,
     HK_PREDICATE,
-    HK_SCALABLE
+    HK_SCALABLE,
+    HK_REASSOCIATE_FP_REDUCTIONS,
   };
 
   /// Hint - associates name and validation with the hint value.
@@ -97,6 +98,10 @@ class LoopVectorizeHints {
   /// Says whether we should use fixed width or scalable vectorization.
   Hint Scalable;
 
+  /// Says whether unsafe reassociation of reductions is allowed
+  /// during the loop vectorization.
+  Hint ReassociateFPReductions;
+
   /// Return the loop metadata prefix.
   static StringRef Prefix() { return "llvm.loop."; }
 
@@ -162,6 +167,13 @@ class LoopVectorizeHints {
     return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
   }
 
+  enum ForceKind getReassociateFPReductions() const {
+    if ((ForceKind)ReassociateFPReductions.Value == FK_Undefined &&
+        hasDisableAllTransformsHint(TheLoop))
+      return FK_Disabled;
+    return (ForceKind)ReassociateFPReductions.Value;
+  }
+
   /// If hints are provided that force vectorization, use the AlwaysPrint
   /// pass name to force the frontend to print the diagnostic.
   const char *vectorizeAnalysisPassName() const;
@@ -173,6 +185,10 @@ class LoopVectorizeHints {
   /// error accumulates in the loop.
   bool allowReordering() const;
 
+  /// Returns true iff the loop hints allow reassociating floating-point
+  /// reductions for the purpose of vectorization.
+  bool allowFPReductionReassociation() const;
+
   bool isPotentiallyUnsafe() const {
     // Avoid FP vectorization if the target is unsure about proper support.
     // This may be related to the SIMD unit in the target not handling

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -97,6 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
   case HK_ISVECTORIZED:
   case HK_PREDICATE:
   case HK_SCALABLE:
+  case HK_REASSOCIATE_FP_REDUCTIONS:
     return (Val == 0 || Val == 1);
   }
   return false;
@@ -112,6 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
       Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
+      ReassociateFPReductions("vectorize.reassociate_fpreductions.enable",
+                              FK_Undefined, HK_REASSOCIATE_FP_REDUCTIONS),
       TheLoop(L), ORE(ORE) {
   // Populate values with existing loop metadata.
   getHintsFromMetadata();
@@ -254,6 +257,11 @@ bool LoopVectorizeHints::allowReordering() const {
           EC.getKnownMinValue() > 1);
 }
 
+bool LoopVectorizeHints::allowFPReductionReassociation() const {
+  return HintsAllowReordering &&
+         getReassociateFPReductions() == LoopVectorizeHints::FK_Enabled;
+}
+
 void LoopVectorizeHints::getHintsFromMetadata() {
   MDNode *LoopID = TheLoop->getLoopID();
   if (!LoopID)
@@ -300,8 +308,13 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
     return;
   unsigned Val = C->getZExtValue();
 
-  Hint *Hints[] = {&Width,        &Interleave, &Force,
-                   &IsVectorized, &Predicate,  &Scalable};
+  Hint *Hints[] = {&Width,
+                   &Interleave,
+                   &Force,
+                   &IsVectorized,
+                   &Predicate,
+                   &Scalable,
+                   &ReassociateFPReductions};
   for (auto *H : Hints) {
     if (Name == H->Name) {
       if (H->validate(Val))
@@ -1311,22 +1324,25 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
     return true;
 
   // If the above is false, we have ExactFPMath & do not allow reordering.
-  // If the EnableStrictReductions flag is set, first check if we have any
-  // Exact FP induction vars, which we cannot vectorize.
-  if (!EnableStrictReductions ||
-      any_of(getInductionVars(), [&](auto &Induction) -> bool {
+  // First check if we have any Exact FP induction vars, which we cannot
+  // vectorize.
+  if (any_of(getInductionVars(), [&](auto &Induction) -> bool {
         InductionDescriptor IndDesc = Induction.second;
         return IndDesc.getExactFPMathInst();
       }))
     return false;
 
-  // We can now only vectorize if all reductions with Exact FP math also
-  // have the isOrdered flag set, which indicates that we can move the
-  // reduction operations in-loop.
-  return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
-    const RecurrenceDescriptor &RdxDesc = Reduction.second;
-    return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
-  }));
+  // We can now only vectorize if EnableStrictReductions flag is set and
+  // all reductions with Exact FP math also have the isOrdered flag set,
+  // which indicates that we can move the reduction operations in-loop.
+  // If the hints allow reassociating FP reductions, then skip
+  // all the checks.
+  return (Hints->allowFPReductionReassociation() ||
+          all_of(getReductionVars(), [&](auto &Reduction) -> bool {
+            const RecurrenceDescriptor &RdxDesc = Reduction.second;
+            return !RdxDesc.hasExactFPMath() ||
+                   (EnableStrictReductions && RdxDesc.isOrdered());
+          }));
 }
 
 bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1000,9 +1000,10 @@ class LoopVectorizationCostModel {
   /// Returns true if we should use strict in-order reductions for the given
   /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
   /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
-  /// of FP operations.
+  /// of FP operations or FP reductions.
   bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const {
-    return !Hints->allowReordering() && RdxDesc.isOrdered();
+    return !Hints->allowReordering() &&
+           !Hints->allowFPReductionReassociation() && RdxDesc.isOrdered();
   }
 
   /// \returns The smallest bitwidth each instruction can be represented with.

diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
@@ -0,0 +1,47 @@
+; Check that the loop with a floating-point reduction is vectorized
+; due to llvm.loop.vectorize.reassociate_fpreductions.enable metadata.
+; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+source_filename = "FIRModule"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @test_(
+; CHECK:    fadd contract <4 x float> {{.*}}
+; CHECK:    call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
+;
+  %invariant.gep = getelementptr i8, ptr %1, i64 -4
+  %.promoted = load float, ptr %0, align 4
+  br label %3
+
+3:                                                ; preds = %2, %3
+  %indvars.iv = phi i64 [ 1, %2 ], [ %indvars.iv.next, %3 ]
+  %4 = phi float [ %.promoted, %2 ], [ %6, %3 ]
+  %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+  %5 = load float, ptr %gep, align 4
+  %6 = fadd contract float %4, %5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1001
+  br i1 %exitcond.not, label %7, label %3, !llvm.loop !2
+
+7:                                                ; preds = %3
+  %.lcssa = phi float [ %6, %3 ]
+  store float %.lcssa, ptr %0, align 4
+  ret void
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "target-cpu"="x86-64" }
+
+!llvm.ident = !{!0}
+!llvm.module.flags = !{!1}
+
+!0 = !{!"flang version 21.0.0"}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true}
+
+; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable
+; CHECK: !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: !{!"llvm.loop.unroll.runtime.disable"}