From 1619ad67557dc0495fdcd2e5b8be84b51b80df91 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 27 May 2025 15:58:17 -0700 Subject: [PATCH 1/5] [RFC][llvm] Added llvm.loop.vectorize.reassociation.enable metadata. This metadata allows unsafe reassociations of computations during the loop vectorization. For example, it allows vectorizing loops with floating-point reductions without the need to compile the whole function/program with -fassociative-math. --- llvm/docs/LangRef.rst | 16 +++++++ .../Vectorize/LoopVectorizationLegality.h | 14 +++++- .../Vectorize/LoopVectorizationLegality.cpp | 8 +++- .../LoopVectorize/reduction-reassociate.ll | 47 +++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 6a4bf6e594d14..b0f42bafd85c1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7593,6 +7593,22 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0 then the interleave count will be determined automatically. +'``llvm.loop.vectorize.reassociation.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata selectively allows or disallows reassociating computations, +which otherwise may be unsafe to reassociate, during the loop vectorization. +For example, a floating point ``ADD`` reduction without ``reassoc`` fast-math +flags may be vectorized provided that this metadata allows it. The first +operand is the string ``llvm.loop.vectorize.reassociation.enable`` +and the second operand is a bit. If the bit operand value is 1 unsafe +reassociations aqre enabled. A value of 0 disables unsafe reassociations. + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.reassociation.enable", i1 0} + !1 = !{!"llvm.loop.vectorize.reassociation.enable", i1 1} + '``llvm.loop.vectorize.enable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index d654ac3ec9273..fb91eb022daf6 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -64,7 +64,8 @@ class LoopVectorizeHints { HK_FORCE, HK_ISVECTORIZED, HK_PREDICATE, - HK_SCALABLE + HK_SCALABLE, + HK_REASSOCIATE, }; /// Hint - associates name and validation with the hint value. @@ -97,6 +98,10 @@ class LoopVectorizeHints { /// Says whether we should use fixed width or scalable vectorization. Hint Scalable; + /// Says whether unsafe reassociation of computations is allowed + /// during the loop vectorization. + Hint Reassociate; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -162,6 +167,13 @@ class LoopVectorizeHints { return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly; } + enum ForceKind getReassociate() const { + if ((ForceKind)Reassociate.Value == FK_Undefined && + hasDisableAllTransformsHint(TheLoop)) + return FK_Disabled; + return (ForceKind)Reassociate.Value; + } + /// If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 8e09e6f8d4935..ec3194f754664 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -97,6 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_ISVECTORIZED: case HK_PREDICATE: case HK_SCALABLE: + case HK_REASSOCIATE: return (Val == 0 || Val == 1); } return false; @@ -112,6 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE), + Reassociate("vectorize.reassociation.enable", FK_Undefined, + HK_REASSOCIATE), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -251,6 +254,7 @@ bool LoopVectorizeHints::allowReordering() const { ElementCount EC = getWidth(); return HintsAllowReordering && (getForce() == LoopVectorizeHints::FK_Enabled || + getReassociate() == LoopVectorizeHints::FK_Enabled || EC.getKnownMinValue() > 1); } @@ -300,8 +304,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, - &IsVectorized, &Predicate, &Scalable}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, + &Predicate, &Scalable, &Reassociate}; for (auto *H : Hints) { if (Name == H->Name) { if (H->validate(Val)) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll new file mode 100644 index 0000000000000..ffe69596545a9 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll @@ -0,0 +1,47 @@ +; Check that the loop with a floating-point reduction is vectorized +; due to llvm.loop.vectorize.reassociation.enable metadata. +; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s + +source_filename = "FIRModule" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) +define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 { +; CHECK-LABEL: define void @test_( +; CHECK-NEXT: fadd contract <4 x float> {{.*}} +; CHECK-NEXT: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; + %invariant.gep = getelementptr i8, ptr %1, i64 -4 + %.promoted = load float, ptr %0, align 4 + br label %3 + +3: ; preds = %2, %3 + %indvars.iv = phi i64 [ 1, %2 ], [ %indvars.iv.next, %3 ] + %4 = phi float [ %.promoted, %2 ], [ %6, %3 ] + %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv + %5 = load float, ptr %gep, align 4 + %6 = fadd contract float %4, %5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1001 + br i1 %exitcond.not, label %7, label %3, !llvm.loop !2 + +7: ; preds = %3 + %.lcssa = phi float [ %6, %3 ] + store float %.lcssa, ptr %0, align 4 + ret void +} + +attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "target-cpu"="x86-64" } + +!llvm.ident = !{!0} +!llvm.module.flags = !{!1} + +!0 = !{!"flang version 21.0.0"} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true} + +; CHECK-NOT: llvm.loop.vectorize.reassociation.enable +; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} From 9511b6e2e10ce539519e9a7f446ccd0f7dd39d84 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 27 May 2025 17:41:20 -0700 Subject: [PATCH 2/5] Fixed test. --- .../Transforms/LoopVectorize/reduction-reassociate.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll index ffe69596545a9..e35ad858b8d89 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll @@ -9,8 +9,8 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 { ; CHECK-LABEL: define void @test_( -; CHECK-NEXT: fadd contract <4 x float> {{.*}} -; CHECK-NEXT: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; CHECK: fadd contract <4 x float> {{.*}} +; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) ; %invariant.gep = getelementptr i8, ptr %1, i64 -4 %.promoted = load float, ptr %0, align 4 @@ -43,5 +43,5 @@ attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "ta !3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true} ; CHECK-NOT: llvm.loop.vectorize.reassociation.enable -; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: !{!"llvm.loop.isvectorized", i32 1} +; CHECK: !{!"llvm.loop.unroll.runtime.disable"} From 5ba9cbd40cbb8dcd2129060cf171655d9efd1c58 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 2 Jun 2025 20:02:32 -0700 Subject: [PATCH 3/5] Made metadata specific to FP reductions. --- llvm/docs/LangRef.rst | 22 +++++---- .../Vectorize/LoopVectorizationLegality.h | 16 ++++--- .../Vectorize/LoopVectorizationLegality.cpp | 46 ++++++++++++------- .../Transforms/Vectorize/LoopVectorize.cpp | 5 +- .../LoopVectorize/reduction-reassociate.ll | 6 +-- 5 files changed, 57 insertions(+), 38 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b0f42bafd85c1..ed5fc5b6c5769 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7593,21 +7593,23 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0 then the interleave count will be determined automatically. -'``llvm.loop.vectorize.reassociation.enable``' Metadata -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.loop.vectorize.reassociate_fpreductions.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This metadata selectively allows or disallows reassociating computations, -which otherwise may be unsafe to reassociate, during the loop vectorization. -For example, a floating point ``ADD`` reduction without ``reassoc`` fast-math -flags may be vectorized provided that this metadata allows it. The first -operand is the string ``llvm.loop.vectorize.reassociation.enable`` +This metadata selectively allows or disallows reassociating floating-point +reductions, which otherwise may be unsafe to reassociate, during the loop +vectorization. For example, a floating point ``ADD`` reduction without +``reassoc`` fast-math flags may be vectorized provided that this metadata +allows it. The first operand is the string +``llvm.loop.vectorize.reassociate_fpreductions.enable`` and the second operand is a bit. If the bit operand value is 1 unsafe -reassociations aqre enabled. A value of 0 disables unsafe reassociations. +reduction reassociations are enabled. A value of 0 disables unsafe +reduction reassociations. .. code-block:: llvm - !0 = !{!"llvm.loop.vectorize.reassociation.enable", i1 0} - !1 = !{!"llvm.loop.vectorize.reassociation.enable", i1 1} + !0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0} + !1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 1} '``llvm.loop.vectorize.enable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index fb91eb022daf6..5911501ca2d3e 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -65,7 +65,7 @@ class LoopVectorizeHints { HK_ISVECTORIZED, HK_PREDICATE, HK_SCALABLE, - HK_REASSOCIATE, + HK_REASSOCIATE_FP_REDUCTIONS, }; /// Hint - associates name and validation with the hint value. @@ -98,9 +98,9 @@ class LoopVectorizeHints { /// Says whether we should use fixed width or scalable vectorization. Hint Scalable; - /// Says whether unsafe reassociation of computations is allowed + /// Says whether unsafe reassociation of reductions is allowed /// during the loop vectorization. - Hint Reassociate; + Hint ReassociateFPReductions; /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -167,11 +167,11 @@ class LoopVectorizeHints { return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly; } - enum ForceKind getReassociate() const { - if ((ForceKind)Reassociate.Value == FK_Undefined && + enum ForceKind getReassociateFPReductions() const { + if ((ForceKind)ReassociateFPReductions.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) return FK_Disabled; - return (ForceKind)Reassociate.Value; + return (ForceKind)ReassociateFPReductions.Value; } /// If hints are provided that force vectorization, use the AlwaysPrint @@ -185,6 +185,10 @@ class LoopVectorizeHints { /// error accumulates in the loop. bool allowReordering() const; + /// Returns true iff the loop hints allow reassociating floating-point + /// reductions for the purpose of vectorization. + bool allowFPReductionReassociation() const; + bool isPotentiallyUnsafe() const { // Avoid FP vectorization if the target is unsure about proper support. // This may be related to the SIMD unit in the target not handling diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index ec3194f754664..dffff6f7278a1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -97,7 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) { case HK_ISVECTORIZED: case HK_PREDICATE: case HK_SCALABLE: - case HK_REASSOCIATE: + case HK_REASSOCIATE_FP_REDUCTIONS: return (Val == 0 || Val == 1); } return false; @@ -113,8 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE), - Reassociate("vectorize.reassociation.enable", FK_Undefined, - HK_REASSOCIATE), + ReassociateFPReductions("vectorize.reassociate_fpreductions.enable", + FK_Undefined, HK_REASSOCIATE_FP_REDUCTIONS), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -254,10 +254,14 @@ bool LoopVectorizeHints::allowReordering() const { ElementCount EC = getWidth(); return HintsAllowReordering && (getForce() == LoopVectorizeHints::FK_Enabled || - getReassociate() == LoopVectorizeHints::FK_Enabled || EC.getKnownMinValue() > 1); } +bool LoopVectorizeHints::allowFPReductionReassociation() const { + return HintsAllowReordering && + getReassociateFPReductions() == LoopVectorizeHints::FK_Enabled; +} + void LoopVectorizeHints::getHintsFromMetadata() { MDNode *LoopID = TheLoop->getLoopID(); if (!LoopID) @@ -304,8 +308,13 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, - &Predicate, &Scalable, &Reassociate}; + Hint *Hints[] = {&Width, + &Interleave, + &Force, + &IsVectorized, + &Predicate, + &Scalable, + &ReassociateFPReductions}; for (auto *H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -1315,22 +1324,25 @@ bool LoopVectorizationLegality::canVectorizeFPMath( return true; // If the above is false, we have ExactFPMath & do not allow reordering. - // If the EnableStrictReductions flag is set, first check if we have any - // Exact FP induction vars, which we cannot vectorize. - if (!EnableStrictReductions || - any_of(getInductionVars(), [&](auto &Induction) -> bool { + // First check if we have any Exact FP induction vars, which we cannot + // vectorize. + if (any_of(getInductionVars(), [&](auto &Induction) -> bool { InductionDescriptor IndDesc = Induction.second; return IndDesc.getExactFPMathInst(); })) return false; - // We can now only vectorize if all reductions with Exact FP math also - // have the isOrdered flag set, which indicates that we can move the - // reduction operations in-loop. - return (all_of(getReductionVars(), [&](auto &Reduction) -> bool { - const RecurrenceDescriptor &RdxDesc = Reduction.second; - return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered(); - })); + // We can now only vectorize if EnableStrictReductions flag is set and + // all reductions with Exact FP math also have the isOrdered flag set, + // which indicates that we can move the reduction operations in-loop. + // If the hints allow reassociating FP reductions, then skip + // all the checks. + return (Hints->allowFPReductionReassociation() || + all_of(getReductionVars(), [&](auto &Reduction) -> bool { + const RecurrenceDescriptor &RdxDesc = Reduction.second; + return !RdxDesc.hasExactFPMath() || + (EnableStrictReductions && RdxDesc.isOrdered()); + })); } bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fc8ebebcf21b7..608715453e40d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1000,9 +1000,10 @@ class LoopVectorizationCostModel { /// Returns true if we should use strict in-order reductions for the given /// RdxDesc. This is true if the -enable-strict-reductions flag is passed, /// the IsOrdered flag of RdxDesc is set and we do not allow reordering - /// of FP operations. + /// of FP operations or FP reductions. bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const { - return !Hints->allowReordering() && RdxDesc.isOrdered(); + return !Hints->allowReordering() && + !Hints->allowFPReductionReassociation() && RdxDesc.isOrdered(); } /// \returns The smallest bitwidth each instruction can be represented with. diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll index e35ad858b8d89..08b08d2d405b6 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll @@ -1,5 +1,5 @@ ; Check that the loop with a floating-point reduction is vectorized -; due to llvm.loop.vectorize.reassociation.enable metadata. +; due to llvm.loop.vectorize.reassociate_fpreductions.enable metadata. ; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s source_filename = "FIRModule" @@ -40,8 +40,8 @@ attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "ta !0 = !{!"flang version 21.0.0"} !1 = !{i32 2, !"Debug Info Version", i32 3} !2 = distinct !{!2, !3} -!3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true} +!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true} -; CHECK-NOT: llvm.loop.vectorize.reassociation.enable +; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable ; CHECK: !{!"llvm.loop.isvectorized", i32 1} ; CHECK: !{!"llvm.loop.unroll.runtime.disable"} From 91f390e8d82e4e5b8c6667c54f621c7d6c842637 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 10 Jun 2025 17:44:22 -0700 Subject: [PATCH 4/5] Updated LangRef and the test. --- llvm/docs/LangRef.rst | 14 +- .../LoopVectorize/reduction-reassociate.ll | 151 ++++++++++++++---- 2 files changed, 130 insertions(+), 35 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ed5fc5b6c5769..6cd7321d0c4e0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7597,7 +7597,7 @@ then the interleave count will be determined automatically. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This metadata selectively allows or disallows reassociating floating-point -reductions, which otherwise may be unsafe to reassociate, during the loop +reductions, which otherwise may be unsafe to reassociate, during loop vectorization. For example, a floating point ``ADD`` reduction without ``reassoc`` fast-math flags may be vectorized provided that this metadata allows it. The first operand is the string @@ -7606,6 +7606,18 @@ and the second operand is a bit. If the bit operand value is 1 unsafe reduction reassociations are enabled. A value of 0 disables unsafe reduction reassociations. +Note that the reassociation of floating point reductions that is allowed +by other means is considered safe, so this metadata is a no-op +in such cases. + +For example, reassociation of floating point reduction +in a loop with ``!{!"llvm.loop.vectorize.enable", i1 1}`` metadata is allowed +regardless of the value of +``llvm.loop.vectorize.reassociate_fpreductions.enable``. + +Similarly, the reassociation is allowed for reduction operations +with ``reassoc`` fast-math flags always. + .. code-block:: llvm !0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll index 08b08d2d405b6..1e760c841f3dd 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll @@ -1,47 +1,130 @@ -; Check that the loop with a floating-point reduction is vectorized -; due to llvm.loop.vectorize.reassociate_fpreductions.enable metadata. -; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s +; Check that the loops with a floating-point reduction are vectorized +; according to llvm.loop.vectorize.reassociate_fpreductions.enable metadata. +; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s -source_filename = "FIRModule" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) -define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 { -; CHECK-LABEL: define void @test_( +define float @test_enable(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_enable( ; CHECK: fadd contract <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD0:[0-9]+]] ; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD3:[0-9]+]] ; - %invariant.gep = getelementptr i8, ptr %1, i64 -4 - %.promoted = load float, ptr %0, align 4 - br label %3 - -3: ; preds = %2, %3 - %indvars.iv = phi i64 [ 1, %2 ], [ %indvars.iv.next, %3 ] - %4 = phi float [ %.promoted, %2 ], [ %6, %3 ] - %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv - %5 = load float, ptr %gep, align 4 - %6 = fadd contract float %4, %5 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, 1001 - br i1 %exitcond.not, label %7, label %3, !llvm.loop !2 - -7: ; preds = %3 - %.lcssa = phi float [ %6, %3 ] - store float %.lcssa, ptr %0, align 4 - ret void +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result } -attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "target-cpu"="x86-64" } +; The reduction is unsafe, and the metadata does not allow +; vectorizing it: +define float @test_disable(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable( +; CHECK-NOT: <4 x float> +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD4:[0-9]+]] +; +entry: + br label %loop -!llvm.ident = !{!0} -!llvm.module.flags = !{!1} +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +; Forced vectorization "makes" the reduction reassociation safe, +; so setting llvm.loop.vectorize.reassociate_fpreductions.enable +; to false does not have effect: +define float @test_disable_with_forced_vectorization(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable_with_forced_vectorization( +; CHECK: fadd contract <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD6:[0-9]+]] +; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD7:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd contract float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !4 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} + +; 'fast' math makes reduction reassociation safe, +; so setting llvm.loop.vectorize.reassociate_fpreductions.enable +; to false does not have effect: +define float @test_disable_with_fast_math(ptr readonly captures(none) %array, float %init) { +; CHECK-LABEL: define float @test_disable_with_fast_math( +; CHECK: fadd fast <4 x float> {{.*}} +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD8:[0-9]+]] +; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> {{.*}}) +; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD9:[0-9]+]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi float [ %init, %entry ], [ %red.next, %loop ] + %gep = getelementptr float, ptr %array, i64 %iv + %element = load float, ptr %gep, align 4 + %red.next = fadd fast float %red, %element + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1000 + br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2 + +exit: + %result = phi float [ %red.next, %loop ] + ret float %result +} -!0 = !{!"flang version 21.0.0"} -!1 = !{i32 2, !"Debug Info Version", i32 3} +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true} !2 = distinct !{!2, !3} -!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true} +!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false} +!4 = distinct !{!4, !3, !5} +!5 = !{!"llvm.loop.vectorize.enable", i1 true} ; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable -; CHECK: !{!"llvm.loop.isvectorized", i32 1} -; CHECK: !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: ![[MD0]] = distinct !{![[MD0]], ![[MD1:[0-9]+]], ![[MD2:[0-9]+]]} +; CHECK: ![[MD1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: ![[MD2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: ![[MD3]] = distinct !{![[MD3]], ![[MD2]], ![[MD1]]} +; CHECK: ![[MD4]] = distinct !{![[MD4]], ![[MD5:[0-9]+]]} +; CHECK: ![[MD5]] = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false} +; CHECK: ![[MD6]] = distinct !{![[MD6]], ![[MD1]], ![[MD2]]} +; CHECK: ![[MD7]] = distinct !{![[MD7]], ![[MD2]], ![[MD1]]} +; CHECK: ![[MD8]] = distinct !{![[MD8]], ![[MD1]], ![[MD2]]} +; CHECK: ![[MD9]] = distinct !{![[MD9]], ![[MD2]], ![[MD1]]} From 676dedebdb336661b44b25fce2ba3f587c7eb04d Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Wed, 11 Jun 2025 11:47:55 -0700 Subject: [PATCH 5/5] Moved test to X86 dir. --- .../Transforms/LoopVectorize/{ => X86}/reduction-reassociate.ll | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/Transforms/LoopVectorize/{ => X86}/reduction-reassociate.ll (100%) diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll similarity index 100% rename from llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll rename to llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll