Skip to content

Commit b291d1a

Browse files
authored
[TII] Do not fold undef copies (#147392)
RegallocBase::cleanupFailedVReg hacks up the state of the liveness in order to facilitate producing valid IR. During this process, we may end up producing undef copies. If the destination of these copies is a spill candidate, we will attempt to fold the source register when issuing the spill. The undef of the source is not propagated to storeRegToStackSlot , thus we end up dropping the undef, issuing a spill, and producing an illegal liveness state. This checks for undef copies, and, if found, inserts a kill instead of spill.
1 parent 3dc5d68 commit b291d1a

File tree

2 files changed

+90
-5
lines changed

2 files changed

+90
-5
lines changed

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -792,12 +792,18 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
792792

793793
const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
794794
MachineBasicBlock::iterator Pos = MI;
795-
796-
if (Flags == MachineMemOperand::MOStore)
797-
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI,
798-
Register());
799-
else
795+
if (Flags == MachineMemOperand::MOStore) {
796+
if (MO.isUndef()) {
797+
// If this is an undef copy, we do not need to bother we inserting spill
798+
// code.
799+
BuildMI(*MBB, Pos, MI.getDebugLoc(), get(TargetOpcode::KILL)).add(MO);
800+
} else {
801+
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI,
802+
Register());
803+
}
804+
} else
800805
loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register());
806+
801807
return &*--Pos;
802808
}
803809

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs --start-before=greedy,2 --stop-after=greedy,2 %s -o - | FileCheck %s
3+
4+
# Make sure there's no machine verifier error
5+
6+
# If RA is unable to find a register to allocate, then cleanupFailedVReg will do ad-hoc rewriting and will insert undefs to make the LiveRanges workable.
7+
# %30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3 is an example of such a rewrite / undef. If we were to want to spill %30, we should not be inserting
8+
# actual spill code, as the source operand is undef.
9+
# Check that there are no verfier issues with the LiveRange of $vgpr0_vgpr1_vgpr2_vgpr3 / that we do not insert spill code for %30.
10+
11+
12+
--- |
13+
define void @foo() #0 {
14+
ret void
15+
}
16+
17+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
18+
19+
...
20+
21+
---
22+
name: foo
23+
tracksRegLiveness: true
24+
stack:
25+
- { id: 0, type: spill-slot, size: 32, alignment: 4 }
26+
machineFunctionInfo:
27+
maxKernArgAlign: 4
28+
isEntryFunction: true
29+
waveLimiter: true
30+
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
31+
stackPtrOffsetReg: '$sgpr32'
32+
frameOffsetReg: '$sgpr33'
33+
hasSpilledVGPRs: true
34+
argumentInfo:
35+
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
36+
dispatchPtr: { reg: '$sgpr4_sgpr5' }
37+
kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
38+
workGroupIDX: { reg: '$sgpr8' }
39+
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
40+
body: |
41+
bb.0:
42+
; CHECK-LABEL: name: foo
43+
; CHECK: INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10 /* regdef */, def %10, 10 /* regdef */, def %1, 10 /* regdef */, def %2, 10 /* regdef */, def $vgpr0_vgpr1_vgpr2_vgpr3, 10 /* regdef */, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
44+
; CHECK-NEXT: KILL undef $vgpr0_vgpr1_vgpr2_vgpr3
45+
; CHECK-NEXT: SI_SPILL_AV160_SAVE %2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.1, align 4, addrspace 5)
46+
; CHECK-NEXT: SI_SPILL_AV256_SAVE %1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5)
47+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY %10
48+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
49+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
50+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY1]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.6, align 4, addrspace 5)
51+
; CHECK-NEXT: INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
52+
; CHECK-NEXT: [[SI_SPILL_V512_RESTORE:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.6, align 4, addrspace 5)
53+
; CHECK-NEXT: $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY [[SI_SPILL_V512_RESTORE]]
54+
; CHECK-NEXT: [[SI_SPILL_V512_RESTORE1:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
55+
; CHECK-NEXT: SI_SPILL_V512_SAVE [[SI_SPILL_V512_RESTORE1]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.4, align 4, addrspace 5)
56+
; CHECK-NEXT: [[SI_SPILL_AV256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_AV256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5)
57+
; CHECK-NEXT: SI_SPILL_V256_SAVE [[SI_SPILL_AV256_RESTORE]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.5, align 4, addrspace 5)
58+
; CHECK-NEXT: [[SI_SPILL_AV160_RESTORE:%[0-9]+]]:vreg_160 = SI_SPILL_AV160_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.1, align 4, addrspace 5)
59+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
60+
; CHECK-NEXT: [[SI_SPILL_AV512_RESTORE:%[0-9]+]]:av_512 = SI_SPILL_AV512_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.4, align 4, addrspace 5)
61+
; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.5, align 4, addrspace 5)
62+
; CHECK-NEXT: INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9 /* reguse */, [[SI_SPILL_AV512_RESTORE]], 9 /* reguse */, [[SI_SPILL_V256_RESTORE]], 9 /* reguse */, [[SI_SPILL_AV160_RESTORE]], 9 /* reguse */, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9 /* reguse */, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16
63+
; CHECK-NEXT: SI_RETURN
64+
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10, def %22:vreg_512, 10, def %25:vreg_256, 10, def %28:vreg_160, 10, def $vgpr0_vgpr1_vgpr2_vgpr3, 10, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
65+
%30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3
66+
%27:av_160 = COPY %28:vreg_160
67+
%24:av_256 = COPY %25:vreg_256
68+
SI_SPILL_V512_SAVE %22:vreg_512, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
69+
%18:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
70+
INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
71+
$agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY %18:vreg_512
72+
%23:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
73+
%26:vreg_256 = COPY %24:av_256
74+
%29:vreg_160 = COPY %27:av_160
75+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %30:av_128
76+
INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9, %23:vreg_512, 9, %26:vreg_256, 9, %29:vreg_160, 9, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16
77+
SI_RETURN
78+
79+
...

0 commit comments

Comments
 (0)