-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Associativity]: Make FMul associative operator #113269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Change-Id: I8231774b6fa7c446d221a4e32098788af32072bb
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-ir Author: Hassnaa Hamdi (hassnaaHamdi) ChangesFull diff: https://github.com/llvm/llvm-project/pull/113269.diff 9 Files Affected:
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948b..cfbdf248edf8e9 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -696,8 +696,8 @@ class Instruction : public User,
///
bool isAssociative() const LLVM_READONLY;
static bool isAssociative(unsigned Opcode) {
- return Opcode == And || Opcode == Or || Opcode == Xor ||
- Opcode == Add || Opcode == Mul;
+ return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
+ Opcode == Mul || Opcode == FMul;
}
/// Return true if the instruction is commutative:
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index a4a8f43646d4ba..61783a7e82c5d9 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s38, v7, 2
; CHECK-NEXT: v_readlane_b32 s39, v7, 3
; CHECK-NEXT: v_readlane_b32 s40, v7, 4
-; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s41, v7, 5
; CHECK-NEXT: v_readlane_b32 s42, v7, 6
; CHECK-NEXT: v_readlane_b32 s43, v7, 7
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v3, v0, v2
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_readlane_b32 s36, v7, 32
@@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4
-; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf38..8a9f023aef36a9 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -4,9 +4,8 @@
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fmul(<4 x float> %V) {
; CHECK-LABEL: @test_fmul(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: ret <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
@@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
}
; (V * C1) * C2 => V * (C1 * C2)
-; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_reassoc(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: ret <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4b..c17fd9a8c5a681 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -17,8 +17,7 @@ define float @fold(float %a) {
; fixed FP mode.
define float @notfold(float %a) {
; CHECK-LABEL: @notfold(
-; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
; CHECK-NEXT: ret float [[MUL1]]
;
%mul = fmul fast float %a, 0x3FF3333340000000
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de657..a35338ad3f3705 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
define <2 x float> @div_constant_dividend3(<2 x float> %x) {
; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index ea7d96c07d5ff2..77f91ac095e131 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -562,12 +562,13 @@ loop:
define void @fmul_noassoc(float %c1, float %c2) {
; CHECK-LABEL: @fmul_noassoc(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
index 73b73735da0210..fd839732de91cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -11,13 +11,14 @@ define void @test(double %i) {
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT: [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
index 36abe96567bb2d..5e9637af7e76ab 100644
--- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
@@ -8,29 +8,21 @@ define double @test() {
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: br label [[COND_TRUE:%.*]]
; CHECK: cond.true:
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT: [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
+; CHECK-NEXT: [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
+; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
+; CHECK-NEXT: [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 825329b5af97ef..c148a734088786 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -8,19 +8,17 @@ declare void @use(double, double)
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body602:
-; CHECK-NEXT: [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
+; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
; CHECK-NEXT: store double [[I4]], ptr null, align 8
; CHECK-NEXT: [[I5:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[I6:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
; CHECK-NEXT: [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)
|
|
@llvm/pr-subscribers-llvm-transforms Author: Hassnaa Hamdi (hassnaaHamdi) ChangesFull diff: https://github.com/llvm/llvm-project/pull/113269.diff 9 Files Affected:
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948b..cfbdf248edf8e9 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -696,8 +696,8 @@ class Instruction : public User,
///
bool isAssociative() const LLVM_READONLY;
static bool isAssociative(unsigned Opcode) {
- return Opcode == And || Opcode == Or || Opcode == Xor ||
- Opcode == Add || Opcode == Mul;
+ return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
+ Opcode == Mul || Opcode == FMul;
}
/// Return true if the instruction is commutative:
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index a4a8f43646d4ba..61783a7e82c5d9 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s38, v7, 2
; CHECK-NEXT: v_readlane_b32 s39, v7, 3
; CHECK-NEXT: v_readlane_b32 s40, v7, 4
-; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s41, v7, 5
; CHECK-NEXT: v_readlane_b32 s42, v7, 6
; CHECK-NEXT: v_readlane_b32 s43, v7, 7
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v3, v0, v2
+; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_readlane_b32 s36, v7, 32
@@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4
-; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf38..8a9f023aef36a9 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -4,9 +4,8 @@
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fmul(<4 x float> %V) {
; CHECK-LABEL: @test_fmul(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: ret <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
@@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
}
; (V * C1) * C2 => V * (C1 * C2)
-; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_reassoc(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT: ret <4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4b..c17fd9a8c5a681 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -17,8 +17,7 @@ define float @fold(float %a) {
; fixed FP mode.
define float @notfold(float %a) {
; CHECK-LABEL: @notfold(
-; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
; CHECK-NEXT: ret float [[MUL1]]
;
%mul = fmul fast float %a, 0x3FF3333340000000
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de657..a35338ad3f3705 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
define <2 x float> @div_constant_dividend3(<2 x float> %x) {
; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
-; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index ea7d96c07d5ff2..77f91ac095e131 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -562,12 +562,13 @@ loop:
define void @fmul_noassoc(float %c1, float %c2) {
; CHECK-LABEL: @fmul_noassoc(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
-; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
index 73b73735da0210..fd839732de91cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -11,13 +11,14 @@ define void @test(double %i) {
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT: [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
index 36abe96567bb2d..5e9637af7e76ab 100644
--- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
@@ -8,29 +8,21 @@ define double @test() {
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: br label [[COND_TRUE:%.*]]
; CHECK: cond.true:
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT: [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
+; CHECK-NEXT: [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
+; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
+; CHECK-NEXT: [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 825329b5af97ef..c148a734088786 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -8,19 +8,17 @@ declare void @use(double, double)
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body602:
-; CHECK-NEXT: [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
+; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
; CHECK-NEXT: store double [[I4]], ptr null, align 8
; CHECK-NEXT: [[I5:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[I6:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
; CHECK-NEXT: [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)
|
| } | ||
|
|
||
| ; Don't hoist if both reassoc and nsz aren't present on both instructions. | ||
| define void @fmul_noassoc(float %c1, float %c2) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
; Don't hoist if both reassoc and nsz aren't present on both instructions.
About that comment, I think maybe there is a bug in the LICM pass specifically in the hoistMulAddAssociation(..) or hoistBOAssociation(..), because that verification should not be related to the associativity of FMul ?
|
But floating point multiplication is not associative in general right? |
| ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000 | ||
| ; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000 | ||
| ; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000 | ||
| ; CHECK-NEXT: ret float [[MUL1]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should not be folded but I think the problem here is not related to the associativity, but related to the handling of fast flags ?
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incorrect, fmul is only associative with the reassoc flag. And this is already handled in the member isAssociative() function (rather than the one taking an Opcode).
Yes, that's true. I will close the pull request. Thank you :) |
No description provided.