Skip to content

Conversation

@paperchalice
Copy link
Contributor

Remove UnsafeFPMath in X86 part, it blocks some bugfixes related to clang and the ultimate goal is to remove resetTargetOptions method in TargetMachine, see FIXME in resetTargetOptions.
See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract

@paperchalice paperchalice force-pushed the unsafe-fp-math/x86 branch 4 times, most recently from 2fd5f5a to 5991c77 Compare August 2, 2025 05:15
@paperchalice paperchalice marked this pull request as ready for review August 3, 2025 11:58
@paperchalice paperchalice requested a review from phoebewang August 3, 2025 11:58
@paperchalice paperchalice requested a review from RKSimon August 3, 2025 11:58
@llvmbot
Copy link
Member

llvmbot commented Aug 3, 2025

@llvm/pr-subscribers-backend-x86

Author: None (paperchalice)

Changes

Remove UnsafeFPMath in X86 part, it blocks some bugfixes related to clang and the ultimate goal is to remove resetTargetOptions method in TargetMachine, see FIXME in resetTargetOptions.
See also https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast
https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract


Patch is 41.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151667.diff

4 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+23-12)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll (+1-1)
  • (modified) llvm/test/CodeGen/X86/fmaddsub-combine.ll (+130-132)
  • (modified) llvm/test/CodeGen/X86/fmsubadd-combine.ll (+26-28)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bbbb1d9057a72..750511a37f86d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8279,8 +8279,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
 static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
                              const X86Subtarget &Subtarget, SelectionDAG &DAG,
                              SDValue &Opnd0, SDValue &Opnd1,
-                             unsigned &NumExtracts,
-                             bool &IsSubAdd) {
+                             unsigned &NumExtracts, bool &IsSubAdd,
+                             bool &HasAllowContract) {
   using namespace SDPatternMatch;
 
   MVT VT = BV->getSimpleValueType(0);
@@ -8292,6 +8292,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
   SDValue InVec1 = DAG.getUNDEF(VT);
 
   NumExtracts = 0;
+  HasAllowContract = NumElts != 0;
 
   // Odd-numbered elements in the input build vector are obtained from
   // adding/subtracting two integer/float elements.
@@ -8350,6 +8351,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
 
     // Increment the number of extractions done.
     ++NumExtracts;
+    HasAllowContract &= Op->getFlags().hasAllowContract();
   }
 
   // Ensure we have found an opcode for both parities and that they are
@@ -8393,9 +8395,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
 /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
 /// FMADDSUB is.
 static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
-                                 SelectionDAG &DAG,
-                                 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
-                                 unsigned ExpectedUses) {
+                                 SelectionDAG &DAG, SDValue &Opnd0,
+                                 SDValue &Opnd1, SDValue &Opnd2,
+                                 unsigned ExpectedUses,
+                                 bool AllowSubAddOrAddSubContract) {
   if (Opnd0.getOpcode() != ISD::FMUL ||
       !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
     return false;
@@ -8406,7 +8409,8 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
   // or MUL + ADDSUB to FMADDSUB.
   const TargetOptions &Options = DAG.getTarget().Options;
   bool AllowFusion =
-      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+      Options.AllowFPOpFusion == FPOpFusion::Fast ||
+      (AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract());
   if (!AllowFusion)
     return false;
 
@@ -8427,15 +8431,17 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
   SDValue Opnd0, Opnd1;
   unsigned NumExtracts;
   bool IsSubAdd;
-  if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
-                        IsSubAdd))
+  bool HasAllowContract;
+  if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd,
+                        HasAllowContract))
     return SDValue();
 
   MVT VT = BV->getSimpleValueType(0);
 
   // Try to generate X86ISD::FMADDSUB node here.
   SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
+  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts,
+                           HasAllowContract)) {
     unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
     return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
   }
@@ -43165,7 +43171,7 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
 /// the fact that they're unused.
 static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
-                             bool &IsSubAdd) {
+                             bool &IsSubAdd, bool &HasAllowContract) {
 
   EVT VT = N->getValueType(0);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -43216,6 +43222,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
   // It's a subadd if the vector in the even parity is an FADD.
   IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
                      : V2->getOpcode() == ISD::FADD;
+  HasAllowContract =
+      V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract();
 
   Opnd0 = LHS;
   Opnd1 = RHS;
@@ -43273,14 +43281,17 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL,
 
   SDValue Opnd0, Opnd1;
   bool IsSubAdd;
-  if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
+  bool HasAllowContract;
+  if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd,
+                        HasAllowContract))
     return SDValue();
 
   MVT VT = N->getSimpleValueType(0);
 
   // Try to generate X86ISD::FMADDSUB node here.
   SDValue Opnd2;
-  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
+  if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2,
+                           HasAllowContract)) {
     unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
     return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
   }
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
index f02d11648362c..6d22f669725a2 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
 
-define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind {
+define <2 x half> @foo(<2 x half> %0) nounwind {
 ; AVX2-LABEL: foo:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    subq $40, %rsp
diff --git a/llvm/test/CodeGen/X86/fmaddsub-combine.ll b/llvm/test/CodeGen/X86/fmaddsub-combine.ll
index 5219ab3fab944..2af219b3cdabb 100644
--- a/llvm/test/CodeGen/X86/fmaddsub-combine.ll
+++ b/llvm/test/CodeGen/X86/fmaddsub-combine.ll
@@ -6,7 +6,7 @@
 
 ; This test checks the fusing of MUL + ADDSUB to FMADDSUB.
 
-define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B,  <2 x double> %C) #0 {
+define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B,  <2 x double> %C) {
 ; NOFMA-LABEL: mul_addsub_pd128:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
@@ -23,14 +23,14 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B,  <2 x do
 ; FMA4-NEXT:    vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <2 x double> %A, %B
-  %Sub = fsub <2 x double> %AB, %C
-  %Add = fadd <2 x double> %AB, %C
+  %AB = fmul contract <2 x double> %A, %B
+  %Sub = fsub contract <2 x double> %AB, %C
+  %Add = fadd contract <2 x double> %AB, %C
   %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %Addsub
 }
 
-define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
+define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
 ; NOFMA-LABEL: mul_addsub_ps128:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulps %xmm1, %xmm0, %xmm0
@@ -47,14 +47,14 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <4 x float> %A, %B
-  %Sub = fsub <4 x float> %AB, %C
-  %Add = fadd <4 x float> %AB, %C
+  %AB = fmul contract <4 x float> %A, %B
+  %Sub = fsub contract <4 x float> %AB, %C
+  %Add = fadd contract <4 x float> %AB, %C
   %Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %Addsub
 }
 
-define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
+define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) {
 ; NOFMA-LABEL: mul_addsub_pd256:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
@@ -71,14 +71,14 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
 ; FMA4-NEXT:    vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <4 x double> %A, %B
-  %Sub = fsub <4 x double> %AB, %C
-  %Add = fadd <4 x double> %AB, %C
+  %AB = fmul contract <4 x double> %A, %B
+  %Sub = fsub contract <4 x double> %AB, %C
+  %Add = fadd contract <4 x double> %AB, %C
   %Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x double> %Addsub
 }
 
-define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
+define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) {
 ; NOFMA-LABEL: mul_addsub_ps256:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0
@@ -95,14 +95,14 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <8 x float> %A, %B
-  %Sub = fsub <8 x float> %AB, %C
-  %Add = fadd <8 x float> %AB, %C
+  %AB = fmul contract <8 x float> %A, %B
+  %Sub = fsub contract <8 x float> %AB, %C
+  %Add = fadd contract <8 x float> %AB, %C
   %Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   ret <8 x float> %Addsub
 }
 
-define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
+define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) {
 ; NOFMA-LABEL: mul_addsub_pd512:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
@@ -128,14 +128,14 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
 ; FMA4-NEXT:    vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <8 x double> %A, %B
-  %Sub = fsub <8 x double> %AB, %C
-  %Add = fadd <8 x double> %AB, %C
+  %AB = fmul contract <8 x double> %A, %B
+  %Sub = fsub contract <8 x double> %AB, %C
+  %Add = fadd contract <8 x double> %AB, %C
   %Addsub = shufflevector <8 x double> %Sub, <8 x double> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   ret <8 x double> %Addsub
 }
 
-define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
+define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) {
 ; NOFMA-LABEL: mul_addsub_ps512:
 ; NOFMA:       # %bb.0: # %entry
 ; NOFMA-NEXT:    vmulps %ymm3, %ymm1, %ymm1
@@ -161,14 +161,14 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
 ; FMA4-NEXT:    retq
 entry:
-  %AB = fmul <16 x float> %A, %B
-  %Sub = fsub <16 x float> %AB, %C
-  %Add = fadd <16 x float> %AB, %C
+  %AB = fmul contract <16 x float> %A, %B
+  %Sub = fsub contract <16 x float> %AB, %C
+  %Add = fadd contract <16 x float> %AB, %C
   %Addsub = shufflevector <16 x float> %Sub, <16 x float> %Add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
   ret <16 x float> %Addsub
 }
 
-define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
+define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
 ; NOFMA-LABEL: buildvector_mul_addsub_ps128:
 ; NOFMA:       # %bb.0: # %bb
 ; NOFMA-NEXT:    vmulps %xmm1, %xmm0, %xmm0
@@ -185,19 +185,19 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
 ; FMA4-NEXT:    retq
 bb:
-  %A = fmul <4 x float> %C, %D
+  %A = fmul contract <4 x float> %C, %D
   %A0 = extractelement <4 x float> %A, i32 0
   %B0 = extractelement <4 x float> %B, i32 0
-  %sub0 = fsub float %A0, %B0
+  %sub0 = fsub contract float %A0, %B0
   %A2 = extractelement <4 x float> %A, i32 2
   %B2 = extractelement <4 x float> %B, i32 2
-  %sub2 = fsub float %A2, %B2
+  %sub2 = fsub contract float %A2, %B2
   %A1 = extractelement <4 x float> %A, i32 1
   %B1 = extractelement <4 x float> %B, i32 1
-  %add1 = fadd float %A1, %B1
+  %add1 = fadd contract float %A1, %B1
   %A3 = extractelement <4 x float> %A, i32 3
   %B3 = extractelement <4 x float> %B, i32 3
-  %add3 = fadd float %A3, %B3
+  %add3 = fadd contract float %A3, %B3
   %vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1
   %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2
@@ -205,7 +205,7 @@ bb:
   ret <4 x float> %vecinsert4
 }
 
-define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
+define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
 ; NOFMA-LABEL: buildvector_mul_addsub_pd128:
 ; NOFMA:       # %bb.0: # %bb
 ; NOFMA-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
@@ -222,19 +222,19 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
 ; FMA4-NEXT:    vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
 ; FMA4-NEXT:    retq
 bb:
-  %A = fmul <2 x double> %C, %D
+  %A = fmul contract <2 x double> %C, %D
   %A0 = extractelement <2 x double> %A, i32 0
   %B0 = extractelement <2 x double> %B, i32 0
-  %sub0 = fsub double %A0, %B0
+  %sub0 = fsub contract double %A0, %B0
   %A1 = extractelement <2 x double> %A, i32 1
   %B1 = extractelement <2 x double> %B, i32 1
-  %add1 = fadd double %A1, %B1
+  %add1 = fadd contract double %A1, %B1
   %vecinsert1 = insertelement <2 x double> undef, double %sub0, i32 0
   %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add1, i32 1
   ret <2 x double> %vecinsert2
 }
 
-define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
+define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
 ; NOFMA-LABEL: buildvector_mul_addsub_ps256:
 ; NOFMA:       # %bb.0: # %bb
 ; NOFMA-NEXT:    vmulps %ymm1, %ymm0, %ymm0
@@ -251,31 +251,31 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
 ; FMA4-NEXT:    retq
 bb:
-  %A = fmul <8 x float> %C, %D
+  %A = fmul contract <8 x float> %C, %D
   %A0 = extractelement <8 x float> %A, i32 0
   %B0 = extractelement <8 x float> %B, i32 0
-  %sub0 = fsub float %A0, %B0
+  %sub0 = fsub contract float %A0, %B0
   %A2 = extractelement <8 x float> %A, i32 2
   %B2 = extractelement <8 x float> %B, i32 2
-  %sub2 = fsub float %A2, %B2
+  %sub2 = fsub contract float %A2, %B2
   %A4 = extractelement <8 x float> %A, i32 4
   %B4 = extractelement <8 x float> %B, i32 4
-  %sub4 = fsub float %A4, %B4
+  %sub4 = fsub contract float %A4, %B4
   %A6 = extractelement <8 x float> %A, i32 6
   %B6 = extractelement <8 x float> %B, i32 6
-  %sub6 = fsub float %A6, %B6
+  %sub6 = fsub contract float %A6, %B6
   %A1 = extractelement <8 x float> %A, i32 1
   %B1 = extractelement <8 x float> %B, i32 1
-  %add1 = fadd float %A1, %B1
+  %add1 = fadd contract float %A1, %B1
   %A3 = extractelement <8 x float> %A, i32 3
   %B3 = extractelement <8 x float> %B, i32 3
-  %add3 = fadd float %A3, %B3
+  %add3 = fadd contract float %A3, %B3
   %A5 = extractelement <8 x float> %A, i32 5
   %B5 = extractelement <8 x float> %B, i32 5
-  %add5 = fadd float %A5, %B5
+  %add5 = fadd contract float %A5, %B5
   %A7 = extractelement <8 x float> %A, i32 7
   %B7 = extractelement <8 x float> %B, i32 7
-  %add7 = fadd float %A7, %B7
+  %add7 = fadd contract float %A7, %B7
   %vecinsert1 = insertelement <8 x float> undef, float %sub0, i32 0
   %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add1, i32 1
   %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub2, i32 2
@@ -287,7 +287,7 @@ bb:
   ret <8 x float> %vecinsert8
 }
 
-define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
+define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
 ; NOFMA-LABEL: buildvector_mul_addsub_pd256:
 ; NOFMA:       # %bb.0: # %bb
 ; NOFMA-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
@@ -304,19 +304,19 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
 ; FMA4-NEXT:    vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
 ; FMA4-NEXT:    retq
 bb:
-  %A = fmul <4 x double> %C, %D
+  %A = fmul contract <4 x double> %C, %D
   %A0 = extractelement <4 x double> %A, i32 0
   %B0 = extractelement <4 x double> %B, i32 0
-  %sub0 = fsub double %A0, %B0
+  %sub0 = fsub contract double %A0, %B0
   %A2 = extractelement <4 x double> %A, i32 2
   %B2 = extractelement <4 x double> %B, i32 2
-  %sub2 = fsub double %A2, %B2
+  %sub2 = fsub contract double %A2, %B2
   %A1 = extractelement <4 x double> %A, i32 1
   %B1 = extractelement <4 x double> %B, i32 1
-  %add1 = fadd double %A1, %B1
+  %add1 = fadd contract double %A1, %B1
   %A3 = extractelement <4 x double> %A, i32 3
   %B3 = extractelement <4 x double> %B, i32 3
-  %add3 = fadd double %A3, %B3
+  %add3 = fadd contract double %A3, %B3
   %vecinsert1 = insertelement <4 x double> undef, double %sub0, i32 0
   %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add1, i32 1
   %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub2, i32 2
@@ -324,7 +324,7 @@ bb:
   ret <4 x double> %vecinsert4
 }
 
-define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
 ; NOFMA-LABEL: buildvector_mul_addsub_ps512:
 ; NOFMA:       # %bb.0: # %bb
 ; NOFMA-NEXT:    vmulps %ymm3, %ymm1, %ymm1
@@ -350,55 +350,55 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
 ; FMA4-NEXT:    vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
 ; FMA4-NEXT:    retq
 bb:
-  %A = fmul <16 x float> %C, %D
+  %A = fmul contract <16 x float> %C, %D
   %A0 = extractelement <16 x float> %A, i32 0
   %B0 = extractelement <16 x float> %B, i32 0
-  %sub0 = fsub float %A0, %B0
+  %sub0 = fsub contract float %A0, %B0
   %A2 = extractelement <16 x float> %A, i32 2
   %B2 = extractelement <16 x float> %B, i32 2
-  %sub2 = fsub float %A2, %B2
+  %sub2 = fsub contract float %A2, %B2
   %A4 = extractelement <16 x float> %A, i32 4
   %B4 = extractelement <16 x float> %B, i32 4
-  %sub4 = fsub float %A4, %B4
+  %sub4 = fsub contract float %A4, %B4
   %A6 = extractelement <16 x float> %A, i32 6
   %B6 = extractelement <16 x float> %B, i32 6
-  %sub6 = fsub float %A6, %B6
+  %sub6 = fsub contract float %A6, %B6
   %A8 = extractelement <16 x float> %A, i32 8
   %B8 = extractelement <16 x float> %B, i32 8
-  %sub8 = fsub float %A8, %B8
+  %sub8 = fsub contract float %A8, %B8
   %A10 = extractelement <16 x float> %A, i32 10
   %B10 = extractelement <16 x float> %B, i32 10
-  %sub10 = fsub float %A10, %B10
+  %sub10 = fsub contract float %A10, %B10
   %A12 = extractelement <16 x float> %A, i32 12
   %B12 = extractelement <16 x float> %B, i32 12
-  %sub12 = fsub float %A12, %B12
+  %sub12 = fsub contract float %A12, %B12
   %A14 = extractelement <16 x float> %A, i32 14
   %B14 = extractelement <16 x float> %B, i32 14
-  %sub14 = fsub float %A14, %B14
+  %sub14 = fsub contract float %A14, %B14
   %A1 = extractelement <16 x float> %A, i32 1
   %B1 = extractelement <16 x float> %B, i32 1
-  %add1 = fadd float %A1, %B1
+  %add1 = fadd contract float %A1, %B1
   %A3 = extractelement <16 x float> %A, i32 3
   %B3 = extractelement <16 x float> %B, i32 3
-  %add3 = fadd float %A3, %B3
+  %add3 = fadd contract float %A3, %B3
   %A5 = extractelement <16 x float> %A, i32 5
   %B5 = extractelement <16 x float> %B, i3...
[truncated]

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@paperchalice paperchalice merged commit 03e902c into llvm:main Aug 5, 2025
12 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 5, 2025

LLVM Buildbot has detected a new failure on builder clang-hip-vega20 running on hip-vega20-0 while building llvm at step 3 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/24706

Here is the relevant piece of the build log for the reference
Step 3 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-build.sh --jobs=' (failure)
...
[57/59] Linking CXX executable External/HIP/math_h-hip-6.3.0
[58/59] Building CXX object External/HIP/CMakeFiles/TheNextWeek-hip-6.3.0.dir/workload/ray-tracing/TheNextWeek/main.cc.o
[59/59] Linking CXX executable External/HIP/TheNextWeek-hip-6.3.0
@@@BUILD_STEP Testing HIP test-suite@@@
+ build_step 'Testing HIP test-suite'
+ echo '@@@BUILD_STEP Testing HIP test-suite@@@'
+ ninja check-hip-simple
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 14 tests, 14 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90
FAIL: test-suite :: External/HIP/blender.test (14 of 14)
******************** TEST 'test-suite :: External/HIP/blender.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.time /bin/bash test_blender.sh
/bin/bash verify_blender.sh /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out
Begin Blender test.
TEST_SUITE_HIP_ROOT=/opt/botworker/llvm/External/hip
Render /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend
Blender 4.1.1 (hash e1743a0317bc built 2024-04-15 23:47:45)
Read blend: "/opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend"
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
I0805 00:32:41.375944 3647209 device.cpp:39] HIPEW initialization succeeded
I0805 00:32:41.380089 3647209 device.cpp:45] Found HIPCC hipcc
I0805 00:32:41.454007 3647209 device.cpp:207] Device has compute preemption or is not used for display.
I0805 00:32:41.454110 3647209 device.cpp:211] Added device "" with id "HIP__0000:a3:00".
I0805 00:32:41.454196 3647209 device.cpp:568] Mapped host memory limit set to 536,444,985,344 bytes. (499.60G)
I0805 00:32:41.454449 3647209 device_impl.cpp:63] Using AVX2 CPU kernels.
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Eyepiece_rim
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.023
Fra:1 Mem:524.05M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Hoses.003
Fra:1 Mem:532.91M (Peak 532.91M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors
Fra:1 Mem:533.03M (Peak 533.03M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Wires
Fra:1 Mem:533.81M (Peak 533.81M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.001
Fra:1 Mem:534.91M (Peak 534.89M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_fill
Fra:1 Mem:536.53M (Peak 536.53M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_glowy_bits
Fra:1 Mem:536.99M (Peak 536.99M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_glowy_bits.001
Step 12 (Testing HIP test-suite) failure: Testing HIP test-suite (failure)
@@@BUILD_STEP Testing HIP test-suite@@@
+ build_step 'Testing HIP test-suite'
+ echo '@@@BUILD_STEP Testing HIP test-suite@@@'
+ ninja check-hip-simple
[0/1] cd /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP && /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/llvm/bin/llvm-lit -sv array-hip-6.3.0.test empty-hip-6.3.0.test with-fopenmp-hip-6.3.0.test saxpy-hip-6.3.0.test memmove-hip-6.3.0.test split-kernel-args-hip-6.3.0.test builtin-logb-scalbn-hip-6.3.0.test TheNextWeek-hip-6.3.0.test algorithm-hip-6.3.0.test cmath-hip-6.3.0.test complex-hip-6.3.0.test math_h-hip-6.3.0.test new-hip-6.3.0.test blender.test
-- Testing: 14 tests, 14 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90
FAIL: test-suite :: External/HIP/blender.test (14 of 14)
******************** TEST 'test-suite :: External/HIP/blender.test' FAILED ********************

/home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/tools/timeit-target --timeout 7200 --limit-core 0 --limit-cpu 7200 --limit-file-size 209715200 --limit-rss-size 838860800 --append-exitstatus --redirect-output /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out --redirect-input /dev/null --summary /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.time /bin/bash test_blender.sh
/bin/bash verify_blender.sh /home/botworker/bbot/clang-hip-vega20/botworker/clang-hip-vega20/test-suite-build/External/HIP/Output/blender.test.out
Begin Blender test.
TEST_SUITE_HIP_ROOT=/opt/botworker/llvm/External/hip
Render /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend
Blender 4.1.1 (hash e1743a0317bc built 2024-04-15 23:47:45)
Read blend: "/opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo_release.blend"
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
Could not open as Ogawa file from provided streams.
Unable to open /opt/botworker/llvm/External/hip/Blender_Scenes/290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.003", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.001", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.002", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
WARN (bke.modifier): source/blender/blenkernel/intern/modifier.cc:425 BKE_modifier_set_error: Object: "GEO-flag.004", Modifier: "MeshSequenceCache", Could not create reader for file //290skydemo2_flags.abc
I0805 00:32:41.375944 3647209 device.cpp:39] HIPEW initialization succeeded
I0805 00:32:41.380089 3647209 device.cpp:45] Found HIPCC hipcc
I0805 00:32:41.454007 3647209 device.cpp:207] Device has compute preemption or is not used for display.
I0805 00:32:41.454110 3647209 device.cpp:211] Added device "" with id "HIP__0000:a3:00".
I0805 00:32:41.454196 3647209 device.cpp:568] Mapped host memory limit set to 536,444,985,344 bytes. (499.60G)
I0805 00:32:41.454449 3647209 device_impl.cpp:63] Using AVX2 CPU kernels.
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Eyepiece_rim
Fra:1 Mem:524.00M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Rivets.023
Fra:1 Mem:524.05M (Peak 524.70M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Hoses.003
Fra:1 Mem:532.91M (Peak 532.91M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors
Fra:1 Mem:533.03M (Peak 533.03M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Wires
Fra:1 Mem:533.81M (Peak 533.81M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Curve_Connectors.001
Fra:1 Mem:534.91M (Peak 534.89M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_fill
Fra:1 Mem:536.53M (Peak 536.53M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_glowy_bits
Fra:1 Mem:536.99M (Peak 536.99M) | Time:00:00.69 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_glowy_bits.001
Fra:1 Mem:537.38M (Peak 537.38M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_greeble
Fra:1 Mem:537.44M (Peak 537.44M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_greeble.002
Fra:1 Mem:538.29M (Peak 538.29M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_greeble.003
Fra:1 Mem:539.12M (Peak 539.12M) | Time:00:00.70 | Mem:0.00M, Peak:0.00M | Scene, View Layer | Synchronizing object | GEO-Head_greeble.004

@paperchalice paperchalice deleted the unsafe-fp-math/x86 branch August 23, 2025 00:28
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants