Skip to content

Commit 2cf303b

Browse files
committed
AMDGPU: Delete redundant recursive copy handling code
This fixes a regression exposed after 4454152. This introduces a few small regressions for true16. There are more cases where the value can propagate through subregister extracts which need new handling. They're also small enough that perhaps there's a way to avoid needing to deal with this case in the first place.
1 parent 2b73dad commit 2cf303b

File tree

5 files changed

+449
-258
lines changed

5 files changed

+449
-258
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,40 +1129,11 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
11291129
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
11301130
return false;
11311131

1132-
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
11331132
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
11341133
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
11351134
return true;
11361135
}
11371136

1138-
// TODO: Verify the following code handles subregisters correctly.
1139-
// TODO: Handle extract of global reference
1140-
if (UseOp.getSubReg())
1141-
return false;
1142-
1143-
if (!OpToFold.isReg())
1144-
return false;
1145-
1146-
Register UseReg = OpToFold.getReg();
1147-
if (!UseReg.isVirtual())
1148-
return false;
1149-
1150-
// Maybe it is just a COPY of an immediate itself.
1151-
1152-
// FIXME: Remove this handling. There is already special case folding of
1153-
// immediate into copy in foldOperand. This is looking for the def of the
1154-
// value the folding started from in the first place.
1155-
MachineInstr *Def = MRI->getVRegDef(UseReg);
1156-
if (Def && TII->isFoldableCopy(*Def)) {
1157-
MachineOperand &DefOp = Def->getOperand(1);
1158-
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
1159-
FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
1160-
OpToFold.DefSubReg);
1161-
appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
1162-
return true;
1163-
}
1164-
}
1165-
11661137
return false;
11671138
}
11681139

llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2138,12 +2138,33 @@ define <2 x double> @v_fma_mul_add_32_v2f64(<2 x double> %x, <2 x double> %y) {
21382138
; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], s[4:5], v[6:7]
21392139
; GFX9-NEXT: s_setpc_b64 s[30:31]
21402140
;
2141-
; GFX1011-LABEL: v_fma_mul_add_32_v2f64:
2142-
; GFX1011: ; %bb.0:
2143-
; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2144-
; GFX1011-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5]
2145-
; GFX1011-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7]
2146-
; GFX1011-NEXT: s_setpc_b64 s[30:31]
2141+
; GFX10-SDAG-LABEL: v_fma_mul_add_32_v2f64:
2142+
; GFX10-SDAG: ; %bb.0:
2143+
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2144+
; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5]
2145+
; GFX10-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7]
2146+
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2147+
;
2148+
; GFX10-GISEL-LABEL: v_fma_mul_add_32_v2f64:
2149+
; GFX10-GISEL: ; %bb.0:
2150+
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2151+
; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5]
2152+
; GFX10-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7]
2153+
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2154+
;
2155+
; GFX11-SDAG-LABEL: v_fma_mul_add_32_v2f64:
2156+
; GFX11-SDAG: ; %bb.0:
2157+
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2158+
; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0x40400000, v[0:1], v[4:5]
2159+
; GFX11-SDAG-NEXT: v_fma_f64 v[2:3], 0x40400000, v[2:3], v[6:7]
2160+
; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
2161+
;
2162+
; GFX11-GISEL-LABEL: v_fma_mul_add_32_v2f64:
2163+
; GFX11-GISEL: ; %bb.0:
2164+
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2165+
; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], 0x40400000, v[4:5]
2166+
; GFX11-GISEL-NEXT: v_fma_f64 v[2:3], v[2:3], 0x40400000, v[6:7]
2167+
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
21472168
%mul = fmul contract <2 x double> %x, <double 32.0, double 32.0>
21482169
%fma = fadd contract <2 x double> %mul, %y
21492170
ret <2 x double> %fma
@@ -2501,8 +2522,8 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) {
25012522
; GFX10-GISEL-LABEL: v_mul_16_v2f64:
25022523
; GFX10-GISEL: ; %bb.0:
25032524
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2504-
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1]
2505-
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x40300000, v[2:3]
2525+
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000
2526+
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000
25062527
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
25072528
;
25082529
; GFX11-SDAG-LABEL: v_mul_16_v2f64:
@@ -2515,8 +2536,8 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) {
25152536
; GFX11-GISEL-LABEL: v_mul_16_v2f64:
25162537
; GFX11-GISEL: ; %bb.0:
25172538
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2518-
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, v[0:1]
2519-
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x40300000, v[2:3]
2539+
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0x40300000
2540+
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0x40300000
25202541
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
25212542
%mul = fmul <2 x double> %x, <double 16.0, double 16.0>
25222543
ret <2 x double> %mul
@@ -2549,8 +2570,8 @@ define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) {
25492570
; GFX10-GISEL-LABEL: v_mul_neg16_v2f64:
25502571
; GFX10-GISEL: ; %bb.0:
25512572
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552-
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1]
2553-
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0xc0300000, v[2:3]
2573+
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000
2574+
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000
25542575
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
25552576
;
25562577
; GFX11-SDAG-LABEL: v_mul_neg16_v2f64:
@@ -2563,8 +2584,8 @@ define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) {
25632584
; GFX11-GISEL-LABEL: v_mul_neg16_v2f64:
25642585
; GFX11-GISEL: ; %bb.0:
25652586
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566-
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0xc0300000, v[0:1]
2567-
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0xc0300000, v[2:3]
2587+
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], 0xc0300000
2588+
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], 0xc0300000
25682589
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
25692590
%mul = fmul <2 x double> %x, <double -16.0, double -16.0>
25702591
ret <2 x double> %mul
@@ -2597,8 +2618,8 @@ define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) {
25972618
; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f64:
25982619
; GFX10-GISEL: ; %bb.0:
25992620
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2600-
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, |v[0:1]|
2601-
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x40300000, |v[2:3]|
2621+
; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000
2622+
; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000
26022623
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
26032624
;
26042625
; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f64:
@@ -2611,8 +2632,8 @@ define <2 x double> @v_mul_fabs_16_v2f64(<2 x double> %x) {
26112632
; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f64:
26122633
; GFX11-GISEL: ; %bb.0:
26132634
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2614-
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], 0x40300000, |v[0:1]|
2615-
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x40300000, |v[2:3]|
2635+
; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0x40300000
2636+
; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], |v[2:3]|, 0x40300000
26162637
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
26172638
%x.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
26182639
%mul = fmul <2 x double> %x.fabs, <double 16.0, double 16.0>

0 commit comments

Comments
 (0)