[Associativity]: Make FMul associative operator #113269

hassnaaHamdi · 2024-10-22T06:34:37Z

No description provided.

Change-Id: I8231774b6fa7c446d221a4e32098788af32072bb

llvmbot · 2024-10-22T06:35:12Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-ir

Author: Hassnaa Hamdi (hassnaaHamdi)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/113269.diff

9 Files Affected:

(modified) llvm/include/llvm/IR/Instruction.h (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll (+5-3)
(modified) llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll (+4-7)
(modified) llvm/test/Transforms/InstCombine/fast-math.ll (+1-2)
(modified) llvm/test/Transforms/InstCombine/fdiv.ll (+1-2)
(modified) llvm/test/Transforms/LICM/hoist-binop.ll (+3-2)
(modified) llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll (+8-7)
(modified) llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll (+12-20)
(modified) llvm/test/Transforms/SLPVectorizer/reschedule.ll (+3-5)

diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948b..cfbdf248edf8e9 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -696,8 +696,8 @@ class Instruction : public User,
   ///
   bool isAssociative() const LLVM_READONLY;
   static bool isAssociative(unsigned Opcode) {
-    return Opcode == And || Opcode == Or || Opcode == Xor ||
-           Opcode == Add || Opcode == Mul;
+    return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
+           Opcode == Mul || Opcode == FMul;
   }
 
   /// Return true if the instruction is commutative:
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index a4a8f43646d4ba..61783a7e82c5d9 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    v_readlane_b32 s38, v7, 2
 ; CHECK-NEXT:    v_readlane_b32 s39, v7, 3
 ; CHECK-NEXT:    v_readlane_b32 s40, v7, 4
-; CHECK-NEXT:    image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    v_readlane_b32 s41, v7, 5
 ; CHECK-NEXT:    v_readlane_b32 s42, v7, 6
 ; CHECK-NEXT:    v_readlane_b32 s43, v7, 7
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v3, v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:  .LBB0_2: ; %bb50
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_readlane_b32 s36, v7, 32
@@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_sub_f32_e32 v1, v1, v4
-; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; CHECK-NEXT:    s_mov_b64 vcc, vcc
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf38..8a9f023aef36a9 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -4,9 +4,8 @@
 ; Verify this doesn't fold when no fast-math-flags are specified
 define <4 x float> @test_fmul(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
@@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
 }
 
 ; (V * C1) * C2 => V * (C1 * C2)
-; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
 define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul_reassoc(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4b..c17fd9a8c5a681 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -17,8 +17,7 @@ define float @fold(float %a) {
 ; fixed FP mode.
 define float @notfold(float %a) {
 ; CHECK-LABEL: @notfold(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
 ; CHECK-NEXT:    ret float [[MUL1]]
 ;
   %mul = fmul fast float %a, 0x3FF3333340000000
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de657..a35338ad3f3705 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
 
 define <2 x float> @div_constant_dividend3(<2 x float> %x) {
 ; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
-; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
 ; CHECK-NEXT:    ret <2 x float> [[T2]]
 ;
   %t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index ea7d96c07d5ff2..77f91ac095e131 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -562,12 +562,13 @@ loop:
 define void @fmul_noassoc(float %c1, float %c2) {
 ; CHECK-LABEL: @fmul_noassoc(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
 ; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
index 73b73735da0210..fd839732de91cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -11,13 +11,14 @@ define void @test(double %i) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
index 36abe96567bb2d..5e9637af7e76ab 100644
--- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
@@ -8,29 +8,21 @@ define double @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    br label [[COND_TRUE:%.*]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
+; CHECK-NEXT:    [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
+; CHECK-NEXT:    [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 825329b5af97ef..c148a734088786 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -8,19 +8,17 @@ declare void @use(double, double)
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  for.body602:
-; CHECK-NEXT:    [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
+; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
 ; CHECK-NEXT:    store double [[I4]], ptr null, align 8
 ; CHECK-NEXT:    [[I5:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[I6:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
 ; CHECK-NEXT:    [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)

llvmbot · 2024-10-22T06:35:12Z

@llvm/pr-subscribers-llvm-transforms

Author: Hassnaa Hamdi (hassnaaHamdi)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/113269.diff

9 Files Affected:

(modified) llvm/include/llvm/IR/Instruction.h (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll (+5-3)
(modified) llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll (+4-7)
(modified) llvm/test/Transforms/InstCombine/fast-math.ll (+1-2)
(modified) llvm/test/Transforms/InstCombine/fdiv.ll (+1-2)
(modified) llvm/test/Transforms/LICM/hoist-binop.ll (+3-2)
(modified) llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll (+8-7)
(modified) llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll (+12-20)
(modified) llvm/test/Transforms/SLPVectorizer/reschedule.ll (+3-5)

diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948b..cfbdf248edf8e9 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -696,8 +696,8 @@ class Instruction : public User,
   ///
   bool isAssociative() const LLVM_READONLY;
   static bool isAssociative(unsigned Opcode) {
-    return Opcode == And || Opcode == Or || Opcode == Xor ||
-           Opcode == Add || Opcode == Mul;
+    return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
+           Opcode == Mul || Opcode == FMul;
   }
 
   /// Return true if the instruction is commutative:
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index a4a8f43646d4ba..61783a7e82c5d9 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    v_readlane_b32 s38, v7, 2
 ; CHECK-NEXT:    v_readlane_b32 s39, v7, 3
 ; CHECK-NEXT:    v_readlane_b32 s40, v7, 4
-; CHECK-NEXT:    image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    v_readlane_b32 s41, v7, 5
 ; CHECK-NEXT:    v_readlane_b32 s42, v7, 6
 ; CHECK-NEXT:    v_readlane_b32 s43, v7, 7
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v3, v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:  .LBB0_2: ; %bb50
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_readlane_b32 s36, v7, 32
@@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_sub_f32_e32 v1, v1, v4
-; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; CHECK-NEXT:    s_mov_b64 vcc, vcc
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf38..8a9f023aef36a9 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -4,9 +4,8 @@
 ; Verify this doesn't fold when no fast-math-flags are specified
 define <4 x float> @test_fmul(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
@@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
 }
 
 ; (V * C1) * C2 => V * (C1 * C2)
-; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
 define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul_reassoc(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4b..c17fd9a8c5a681 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -17,8 +17,7 @@ define float @fold(float %a) {
 ; fixed FP mode.
 define float @notfold(float %a) {
 ; CHECK-LABEL: @notfold(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
 ; CHECK-NEXT:    ret float [[MUL1]]
 ;
   %mul = fmul fast float %a, 0x3FF3333340000000
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de657..a35338ad3f3705 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
 
 define <2 x float> @div_constant_dividend3(<2 x float> %x) {
 ; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
-; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
 ; CHECK-NEXT:    ret <2 x float> [[T2]]
 ;
   %t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index ea7d96c07d5ff2..77f91ac095e131 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -562,12 +562,13 @@ loop:
 define void @fmul_noassoc(float %c1, float %c2) {
 ; CHECK-LABEL: @fmul_noassoc(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
 ; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
index 73b73735da0210..fd839732de91cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -11,13 +11,14 @@ define void @test(double %i) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
index 36abe96567bb2d..5e9637af7e76ab 100644
--- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
@@ -8,29 +8,21 @@ define double @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    br label [[COND_TRUE:%.*]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
+; CHECK-NEXT:    [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
+; CHECK-NEXT:    [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 825329b5af97ef..c148a734088786 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -8,19 +8,17 @@ declare void @use(double, double)
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  for.body602:
-; CHECK-NEXT:    [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
+; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
 ; CHECK-NEXT:    store double [[I4]], ptr null, align 8
 ; CHECK-NEXT:    [[I5:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[I6:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
 ; CHECK-NEXT:    [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)

hassnaaHamdi · 2024-10-22T06:54:25Z

llvm/test/Transforms/LICM/hoist-binop.ll

 }

 ; Don't hoist if both reassoc and nsz aren't present on both instructions.
 define void @fmul_noassoc(float %c1, float %c2) {


; Don't hoist if both reassoc and nsz aren't present on both instructions.
About that comment, I think maybe there is a bug in the LICM pass specifically in the hoistMulAddAssociation(..) or hoistBOAssociation(..), because that verification should not be related to the associativity of FMul ?

optimisan · 2024-10-22T07:11:02Z

But floating point multiplication is not associative in general right?

hassnaaHamdi · 2024-10-22T07:39:53Z

llvm/test/Transforms/InstCombine/fast-math.ll

-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
 ; CHECK-NEXT:    ret float [[MUL1]]


I think this should not be folded but I think the problem here is not related to the associativity, but related to the handling of fast flags ?

nikic

This is incorrect, fmul is only associative with the reassoc flag. And this is already handled in the member isAssociative() function (rather than the one taking an Opcode).

hassnaaHamdi · 2024-10-22T08:37:38Z

This is incorrect, fmul is only associative with the reassoc flag. And this is already handled in the member isAssociative() function (rather than the one taking an Opcode).

Yes, that's true. I will close the pull request. Thank you :)

[Associativity]: Make FMul associative operator

55d5f62

Change-Id: I8231774b6fa7c446d221a4e32098788af32072bb

llvmbot added backend:AMDGPU llvm:ir llvm:transforms labels Oct 22, 2024

hassnaaHamdi commented Oct 22, 2024

View reviewed changes

hassnaaHamdi requested review from nikic and removed request for nikic October 22, 2024 07:57

nikic requested changes Oct 22, 2024

View reviewed changes

hassnaaHamdi closed this Oct 22, 2024

hassnaaHamdi reopened this Oct 22, 2024

hassnaaHamdi closed this Oct 22, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Associativity]: Make FMul associative operator #113269

[Associativity]: Make FMul associative operator #113269

Uh oh!

hassnaaHamdi commented Oct 22, 2024

Uh oh!

llvmbot commented Oct 22, 2024 •

edited

Loading

Uh oh!

llvmbot commented Oct 22, 2024

Uh oh!

hassnaaHamdi Oct 22, 2024

Uh oh!

optimisan commented Oct 22, 2024

Uh oh!

hassnaaHamdi Oct 22, 2024

Uh oh!

nikic left a comment

Uh oh!

hassnaaHamdi commented Oct 22, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[Associativity]: Make FMul associative operator #113269

[Associativity]: Make FMul associative operator #113269

Uh oh!

Conversation

hassnaaHamdi commented Oct 22, 2024

Uh oh!

llvmbot commented Oct 22, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 22, 2024

Uh oh!

hassnaaHamdi Oct 22, 2024

Choose a reason for hiding this comment

Uh oh!

optimisan commented Oct 22, 2024

Uh oh!

hassnaaHamdi Oct 22, 2024

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

hassnaaHamdi commented Oct 22, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Oct 22, 2024 •

edited

Loading