Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/IR/Instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,8 @@ class Instruction : public User,
///
bool isAssociative() const LLVM_READONLY;
static bool isAssociative(unsigned Opcode) {
return Opcode == And || Opcode == Or || Opcode == Xor ||
Opcode == Add || Opcode == Mul;
return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
Opcode == Mul || Opcode == FMul;
}

/// Return true if the instruction is commutative:
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s38, v7, 2
; CHECK-NEXT: v_readlane_b32 s39, v7, 3
; CHECK-NEXT: v_readlane_b32 s40, v7, 4
; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_readlane_b32 s41, v7, 5
; CHECK-NEXT: v_readlane_b32 s42, v7, 6
; CHECK-NEXT: v_readlane_b32 s43, v7, 7
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mul_f32_e32 v3, v0, v2
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_readlane_b32 s36, v7, 32
Expand All @@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
Expand Down
11 changes: 4 additions & 7 deletions llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
; Verify this doesn't fold when no fast-math-flags are specified
define <4 x float> @test_fmul(<4 x float> %V) {
; CHECK-LABEL: @test_fmul(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
Expand Down Expand Up @@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
}

; (V * C1) * C2 => V * (C1 * C2)
; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
; CHECK-LABEL: @test_fmul_reassoc(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
; CHECK-NEXT: ret <4 x float> [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
%Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
%Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
ret <4 x float> %Z
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/InstCombine/fast-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ define float @fold(float %a) {
; fixed FP mode.
define float @notfold(float %a) {
; CHECK-LABEL: @notfold(
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
; CHECK-NEXT: ret float [[MUL1]]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should not be folded but I think the problem here is not related to the associativity, but related to the handling of fast flags ?

;
%mul = fmul fast float %a, 0x3FF3333340000000
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/InstCombine/fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {

define <2 x float> @div_constant_dividend3(<2 x float> %x) {
; CHECK-LABEL: @div_constant_dividend3(
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
; CHECK-NEXT: [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
; CHECK-NEXT: ret <2 x float> [[T2]]
;
%t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/Transforms/LICM/hoist-binop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -562,12 +562,13 @@ loop:
define void @fmul_noassoc(float %c1, float %c2) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

; Don't hoist if both reassoc and nsz aren't present on both instructions.
About that comment, I think maybe there is a bug in the LICM pass specifically in the hoistMulAddAssociation(..) or hoistBOAssociation(..), because that verification should not be related to the associativity of FMul ?

; CHECK-LABEL: @fmul_noassoc(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
; CHECK-NEXT: [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
; CHECK-NEXT: call void @use(float [[STEP_ADD]])
; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
; CHECK-NEXT: [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ define void @test(double %i) {
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
; CHECK-NEXT: [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
Expand Down
32 changes: 12 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,21 @@ define double @test() {
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: br label [[COND_TRUE:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
; CHECK-NEXT: [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
; CHECK-NEXT: [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
; CHECK-NEXT: [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
; CHECK-NEXT: [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/Transforms/SLPVectorizer/reschedule.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@ declare void @use(double, double)
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body602:
; CHECK-NEXT: [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
; CHECK-NEXT: [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
; CHECK-NEXT: store double [[I4]], ptr null, align 8
; CHECK-NEXT: [[I5:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[I6:%.*]] = load double, ptr null, align 8
; CHECK-NEXT: [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
; CHECK-NEXT: [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)
Expand Down
Loading