From 55d5f62faca57cb80628bd3ba8fafcd724a93d5e Mon Sep 17 00:00:00 2001 From: Hassnaa Hamdi Date: Tue, 22 Oct 2024 06:28:59 +0000 Subject: [PATCH] [Associativity]: Make FMul associative operator Change-Id: I8231774b6fa7c446d221a4e32098788af32072bb --- llvm/include/llvm/IR/Instruction.h | 4 +-- .../identical-subrange-spill-infloop.ll | 8 +++-- .../InstCombine/2006-10-26-VectorReassoc.ll | 11 +++---- llvm/test/Transforms/InstCombine/fast-math.ll | 3 +- llvm/test/Transforms/InstCombine/fdiv.ll | 3 +- llvm/test/Transforms/LICM/hoist-binop.ll | 5 +-- .../X86/extractelement-multi-register-use.ll | 15 +++++---- .../buildvector-nodes-dependency.ll | 32 +++++++------------ .../Transforms/SLPVectorizer/reschedule.ll | 8 ++--- 9 files changed, 39 insertions(+), 50 deletions(-) diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 61dba265dc948..cfbdf248edf8e 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -696,8 +696,8 @@ class Instruction : public User, /// bool isAssociative() const LLVM_READONLY; static bool isAssociative(unsigned Opcode) { - return Opcode == And || Opcode == Or || Opcode == Xor || - Opcode == Add || Opcode == Mul; + return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add || + Opcode == Mul || Opcode == FMul; } /// Return true if the instruction is commutative: diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index a4a8f43646d4b..61783a7e82c5d 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s38, v7, 2 ; CHECK-NEXT: v_readlane_b32 s39, v7, 3 ; CHECK-NEXT: v_readlane_b32 s40, v7, 4 -; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1 ; CHECK-NEXT: v_readlane_b32 s41, v7, 5 ; CHECK-NEXT: v_readlane_b32 s42, v7, 6 ; CHECK-NEXT: v_readlane_b32 s43, v7, 7 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mul_f32_e32 v3, v0, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: .LBB0_2: ; %bb50 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_readlane_b32 s36, v7, 32 @@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 -; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3 ; CHECK-NEXT: s_mov_b64 vcc, vcc ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll index fb860a5e7bdf3..8a9f023aef36a 100644 --- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll +++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll @@ -4,9 +4,8 @@ ; Verify this doesn't fold when no fast-math-flags are specified define <4 x float> @test_fmul(<4 x float> %V) { ; CHECK-LABEL: @test_fmul( -; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], -; CHECK-NEXT: ret <4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ret <4 x float> %Z @@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) { } ; (V * C1) * C2 => V * (C1 * C2) -; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 } define <4 x float> @test_fmul_reassoc(<4 x float> %V) { ; CHECK-LABEL: @test_fmul_reassoc( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], -; CHECK-NEXT: ret <4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ret <4 x float> %Z diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 32f136d53fab4..c17fd9a8c5a68 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -17,8 +17,7 @@ define float @fold(float %a) { ; fixed FP mode. define float @notfold(float %a) { ; CHECK-LABEL: @notfold( -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000 -; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000 +; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000 ; CHECK-NEXT: ret float [[MUL1]] ; %mul = fmul fast float %a, 0x3FF3333340000000 diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 12d6e6463de65..a35338ad3f370 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) { define <2 x float> @div_constant_dividend3(<2 x float> %x) { ; CHECK-LABEL: @div_constant_dividend3( -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], -; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], +; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], ; CHECK-NEXT: ret <2 x float> [[T2]] ; %t1 = fdiv <2 x float> , %x diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll index ea7d96c07d5ff..77f91ac095e13 100644 --- a/llvm/test/Transforms/LICM/hoist-binop.ll +++ b/llvm/test/Transforms/LICM/hoist-binop.ll @@ -562,12 +562,13 @@ loop: define void @fmul_noassoc(float %c1, float %c2) { ; CHECK-LABEL: @fmul_noassoc( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]] ; CHECK-NEXT: call void @use(float [[STEP_ADD]]) -; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]] +; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]] ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll index 73b73735da021..fd839732de91c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll @@ -11,13 +11,14 @@ define void @test(double %i) { ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[I75:%.*]] = fsub double 0.000000e+00, [[I]] ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> , <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]] +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 +; CHECK-NEXT: [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP8]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll index 36abe96567bb2..5e9637af7e76a 100644 --- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll +++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll @@ -8,29 +8,21 @@ define double @test() { ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr null, align 8 ; CHECK-NEXT: br label [[COND_TRUE:%.*]] ; CHECK: cond.true: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> , double [[TMP0]], i32 1 +; CHECK-NEXT: [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00 +; CHECK-NEXT: [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]] +; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]] +; CHECK-NEXT: [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1 ; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll index 825329b5af97e..c148a73408878 100644 --- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll +++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll @@ -8,19 +8,17 @@ declare void @use(double, double) define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: for.body602: -; CHECK-NEXT: [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]]) +; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) ; CHECK-NEXT: store double [[I4]], ptr null, align 8 ; CHECK-NEXT: [[I5:%.*]] = load double, ptr null, align 8 ; CHECK-NEXT: [[I6:%.*]] = load double, ptr null, align 8 ; CHECK-NEXT: [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]] ; CHECK-NEXT: [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) ; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)