Skip to content

Commit 4e86066

Browse files
committed
Add testcase that spills a killed register
Signed-off-by: John Lu <[email protected]>
1 parent c648db5 commit 4e86066

File tree

1 file changed

+168
-0
lines changed

1 file changed

+168
-0
lines changed
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -start-before=prologepilog -stop-after=postrapseudos -o - %s | FileCheck -check-prefix=GCN %s
2+
3+
# This testcase shows the necessity of adding an undef flag to the $sgpr source when
4+
# creating a V_WRITELANE_B32.
5+
#
6+
# The prologepilog pass will create a:
7+
#
8+
# $exec = S_MOV_B64 killed $sgpr4_sgpr5
9+
#
10+
# but $sgpr5 will be spilled by a:
11+
#
12+
# $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr1(tied-def 0), implicit $sgpr4_sgpr5
13+
#
14+
# The SI_SPILL_S32_TO_VGPR will be converted in postrapesudos to:
15+
#
16+
# $vgpr1 = V_WRITELANE_B32 undef $sgpr5, 1, killed $vgpr1(tied-def 0)
17+
#
18+
# The undef flag is necessary to satisfy the MachineVerifier since $sgpr5 is used after a kill.
19+
# This testcase was derived from function bitcast_v32i16_to_v64i8_scalar in amdgcn.bitcast.512bit.ll.
20+
21+
# GCN-LABEL: name: bitcast_v32i16_to_v64i8_scalar
22+
---
23+
name: bitcast_v32i16_to_v64i8_scalar
24+
alignment: 1
25+
exposesReturnsTwice: false
26+
legalized: false
27+
regBankSelected: false
28+
selected: false
29+
failedISel: false
30+
tracksRegLiveness: true
31+
hasWinCFI: false
32+
noPhis: true
33+
isSSA: false
34+
noVRegs: true
35+
hasFakeUses: false
36+
callsEHReturn: false
37+
callsUnwindInit: false
38+
hasEHContTarget: false
39+
hasEHScopes: false
40+
hasEHFunclets: false
41+
isOutlined: false
42+
debugInstrRef: false
43+
failsVerification: false
44+
tracksDebugUserValues: true
45+
registers: []
46+
liveins: []
47+
frameInfo:
48+
isFrameAddressTaken: false
49+
isReturnAddressTaken: false
50+
hasStackMap: false
51+
hasPatchPoint: false
52+
stackSize: 0
53+
offsetAdjustment: 0
54+
maxAlignment: 4
55+
adjustsStack: false
56+
hasCalls: false
57+
stackProtector: ''
58+
functionContext: ''
59+
maxCallFrameSize: 4294967295
60+
cvBytesOfCalleeSavedRegisters: 0
61+
hasOpaqueSPAdjustment: false
62+
hasVAStart: false
63+
hasMustTailInVarArgFunc: false
64+
hasTailCall: false
65+
isCalleeSavedInfoValid: false
66+
localFrameSize: 0
67+
fixedStack: []
68+
stack:
69+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
70+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
71+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
72+
- { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
73+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
74+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
75+
- { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
76+
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
77+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
78+
entry_values: []
79+
callSites: []
80+
debugValueSubstitutions: []
81+
constants: []
82+
machineFunctionInfo:
83+
explicitKernArgSize: 0
84+
maxKernArgAlign: 1
85+
ldsSize: 0
86+
gdsSize: 0
87+
dynLDSAlign: 1
88+
isEntryFunction: false
89+
isChainFunction: false
90+
noSignedZerosFPMath: false
91+
memoryBound: false
92+
waveLimiter: false
93+
hasSpilledSGPRs: true
94+
hasSpilledVGPRs: true
95+
numWaveDispatchSGPRs: 30
96+
numWaveDispatchVGPRs: 20
97+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
98+
frameOffsetReg: '$sgpr33'
99+
stackPtrOffsetReg: '$sgpr32'
100+
bytesInStackArgArea: 0
101+
returnsVoid: true
102+
argumentInfo:
103+
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
104+
dispatchPtr: { reg: '$sgpr4_sgpr5' }
105+
queuePtr: { reg: '$sgpr6_sgpr7' }
106+
dispatchID: { reg: '$sgpr10_sgpr11' }
107+
workGroupIDX: { reg: '$sgpr12' }
108+
workGroupIDY: { reg: '$sgpr13' }
109+
workGroupIDZ: { reg: '$sgpr14' }
110+
LDSKernelId: { reg: '$sgpr15' }
111+
implicitArgPtr: { reg: '$sgpr8_sgpr9' }
112+
workItemIDX: { reg: '$vgpr31', mask: 1023 }
113+
workItemIDY: { reg: '$vgpr31', mask: 1047552 }
114+
workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
115+
psInputAddr: 0
116+
psInputEnable: 0
117+
maxMemoryClusterDWords: 8
118+
mode:
119+
ieee: true
120+
dx10-clamp: true
121+
fp32-input-denormals: true
122+
fp32-output-denormals: true
123+
fp64-fp16-input-denormals: true
124+
fp64-fp16-output-denormals: true
125+
highBitsOf32BitAddress: 0
126+
occupancy: 3
127+
spillPhysVGPRs:
128+
- '$vgpr63'
129+
wwmReservedRegs:
130+
- '$vgpr63'
131+
- '$vgpr62'
132+
vgprForAGPRCopy: ''
133+
sgprForEXECCopy: '$sgpr100_sgpr101'
134+
longBranchReservedReg: ''
135+
hasInitWholeWave: false
136+
dynamicVGPRBlockSize: 0
137+
scratchReservedForDynamicVGPRs: 0
138+
numKernargPreloadSGPRs: 0
139+
isWholeWaveFunction: false
140+
body: |
141+
bb.0:
142+
successors: %bb.3(0x40000000), %bb.1(0x40000000)
143+
144+
renamable $sgpr4 = IMPLICIT_DEF
145+
$vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr62, implicit $sgpr4_sgpr5
146+
S_CBRANCH_EXECZ %bb.1, implicit $exec
147+
S_BRANCH %bb.3
148+
149+
bb.1:
150+
successors: %bb.2(0x40000000), %bb.4(0x40000000)
151+
152+
S_CBRANCH_EXECZ %bb.4, implicit $exec
153+
S_BRANCH %bb.2
154+
155+
bb.2:
156+
successors: %bb.4(0x80000000)
157+
158+
S_BRANCH %bb.4
159+
160+
bb.3:
161+
successors: %bb.1(0x80000000)
162+
163+
S_BRANCH %bb.1
164+
165+
bb.4:
166+
167+
SI_RETURN
168+
...

0 commit comments

Comments
 (0)