Skip to content

Commit e64adf2

Browse files
committed
[DAGCombiner] Fold freeze(fmul) + fadd/fsub into FMA combine
1 parent f871466 commit e64adf2

File tree

2 files changed

+71
-77
lines changed

2 files changed

+71
-77
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16736,6 +16736,28 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
1673616736
}
1673716737
}
1673816738

16739+
// fold (fadd (freeze (fmul x, y)), z) -> (fma x, y, z).
16740+
if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
16741+
N0.getOpcode() == ISD::FREEZE) {
16742+
SDValue FrozenMul = N0.getOperand(0);
16743+
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
16744+
SDValue X = FrozenMul.getOperand(0);
16745+
SDValue Y = FrozenMul.getOperand(1);
16746+
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N1);
16747+
}
16748+
}
16749+
16750+
// fold (fadd x, (freeze (fmul y, z))) -> (fma y, z, x)
16751+
if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
16752+
N1.getOpcode() == ISD::FREEZE) {
16753+
SDValue FrozenMul = N1.getOperand(0);
16754+
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
16755+
SDValue X = FrozenMul.getOperand(0);
16756+
SDValue Y = FrozenMul.getOperand(1);
16757+
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, N0);
16758+
}
16759+
}
16760+
1673916761
// More folding opportunities when target permits.
1674016762
if (Aggressive) {
1674116763
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
@@ -17013,6 +17035,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
1701317035
}
1701417036
}
1701517037

17038+
// fold (fsub (freeze (fmul x, y)), z) -> (fma x, y, (fneg z))
17039+
if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
17040+
N0.getOpcode() == ISD::FREEZE) {
17041+
SDValue FrozenMul = N0.getOperand(0);
17042+
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
17043+
SDValue X = FrozenMul.getOperand(0);
17044+
SDValue Y = FrozenMul.getOperand(1);
17045+
SDValue NegZ = matcher.getNode(ISD::FNEG, SL, VT, N1);
17046+
return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, NegZ);
17047+
}
17048+
}
17049+
17050+
// fold (fsub z, (freeze(fmul x, y))) -> (fma (fneg x), y, z)
17051+
if ((Options.UnsafeFPMath || N->getFlags().hasAllowContract()) &&
17052+
N1.getOpcode() == ISD::FREEZE) {
17053+
SDValue FrozenMul = N1.getOperand(0);
17054+
if (matcher.match(FrozenMul, ISD::FMUL) && isContractableFMUL(FrozenMul)) {
17055+
SDValue X = FrozenMul.getOperand(0);
17056+
SDValue Y = FrozenMul.getOperand(1);
17057+
SDValue NegX = matcher.getNode(ISD::FNEG, SL, VT, X);
17058+
return matcher.getNode(PreferredFusedOpcode, SL, VT, NegX, Y, N0);
17059+
}
17060+
}
17061+
1701617062
auto isReassociable = [&Options](SDNode *N) {
1701717063
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
1701817064
};
Lines changed: 25 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,106 +1,54 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX11
33

44
define float @fma_from_freeze_mul_add_left(float %x, float %y) {
5-
; CHECK-LABEL: fma_from_freeze_mul_add_left:
6-
; CHECK: ; %bb.0:
7-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8-
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
9-
; CHECK-NEXT: s_setpc_b64 s[30:31]
5+
; GFX11-LABEL: fma_from_freeze_mul_add_left:
6+
; GFX11: ; %bb.0: ; %bb
7+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
9+
; GFX11-NEXT: s_setpc_b64 s[30:31]
10+
bb:
1011
%mul = fmul contract float %x, %y
1112
%mul.fr = freeze float %mul
1213
%add = fadd contract float %mul.fr, 1.000000e+00
1314
ret float %add
1415
}
1516

16-
define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
17-
; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
18-
; CHECK: ; %bb.0:
19-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
21-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
22-
; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
23-
; CHECK-NEXT: s_setpc_b64 s[30:31]
24-
%mul = fmul nnan contract afn float %x, %y
25-
%mul.fr = freeze float %mul
26-
%add = fadd nnan contract float %mul.fr, 1.000000e+00
27-
ret float %add
28-
}
29-
3017
define float @fma_from_freeze_mul_add_right(float %x, float %y) {
31-
; CHECK-LABEL: fma_from_freeze_mul_add_right:
32-
; CHECK: ; %bb.0:
33-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34-
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
35-
; CHECK-NEXT: s_setpc_b64 s[30:31]
18+
; GFX11-LABEL: fma_from_freeze_mul_add_right:
19+
; GFX11: ; %bb.0: ; %bb
20+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; GFX11-NEXT: v_fma_f32 v0, v0, v1, 1.0
22+
; GFX11-NEXT: s_setpc_b64 s[30:31]
23+
bb:
3624
%mul = fmul contract float %x, %y
3725
%mul.fr = freeze float %mul
3826
%add = fadd contract float 1.000000e+00, %mul.fr
3927
ret float %add
4028
}
4129

42-
define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
43-
; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
44-
; CHECK: ; %bb.0:
45-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
47-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
48-
; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
49-
; CHECK-NEXT: s_setpc_b64 s[30:31]
50-
%mul = fmul nnan contract float %x, %y
51-
%mul.fr = freeze float %mul
52-
%add = fadd nnan contract float 1.000000e+00, %mul.fr
53-
ret float %add
54-
}
55-
5630
define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
57-
; CHECK-LABEL: fma_from_freeze_mul_sub_left:
58-
; CHECK: ; %bb.0:
59-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60-
; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
61-
; CHECK-NEXT: s_setpc_b64 s[30:31]
31+
; GFX11-LABEL: fma_from_freeze_mul_sub_left:
32+
; GFX11: ; %bb.0: ; %bb
33+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34+
; GFX11-NEXT: v_fma_f32 v0, v0, v1, -1.0
35+
; GFX11-NEXT: s_setpc_b64 s[30:31]
36+
bb:
6237
%mul = fmul contract float %x, %y
6338
%mul.fr = freeze float %mul
6439
%sub = fsub contract float %mul.fr, 1.000000e+00
6540
ret float %sub
6641
}
6742

68-
define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
69-
; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
70-
; CHECK: ; %bb.0:
71-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
73-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
74-
; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0
75-
; CHECK-NEXT: s_setpc_b64 s[30:31]
76-
%mul = fmul nnan contract float %x, %y
77-
%mul.fr = freeze float %mul
78-
%sub = fsub nnan contract float %mul.fr, 1.000000e+00
79-
ret float %sub
80-
}
81-
8243
define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
83-
; CHECK-LABEL: fma_from_freeze_mul_sub_right:
84-
; CHECK: ; %bb.0:
85-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86-
; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
87-
; CHECK-NEXT: s_setpc_b64 s[30:31]
44+
; GFX11-LABEL: fma_from_freeze_mul_sub_right:
45+
; GFX11: ; %bb.0: ; %bb
46+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47+
; GFX11-NEXT: v_fma_f32 v0, -v0, v1, 1.0
48+
; GFX11-NEXT: s_setpc_b64 s[30:31]
49+
bb:
8850
%mul = fmul contract float %x, %y
8951
%mul.fr = freeze float %mul
9052
%sub = fsub contract float 1.000000e+00, %mul.fr
9153
ret float %sub
9254
}
93-
94-
define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
95-
; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
96-
; CHECK: ; %bb.0:
97-
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
99-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
100-
; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
101-
; CHECK-NEXT: s_setpc_b64 s[30:31]
102-
%mul = fmul nnan contract float %x, %y
103-
%mul.fr = freeze float %mul
104-
%sub = fsub nnan contract float 1.000000e+00, %mul.fr
105-
ret float %sub
106-
}

0 commit comments

Comments
 (0)