Skip to content

Commit 0b6db77

Browse files
authored
[AMDGPU] Handle AV classes in SIFixSGPRCopies::processPHINode (#169038)
Fix a problem exposed by #166483 using AV classes in more places. `isVectorRegister` only accepts registers of VGPR or AGPR classes. `hasVectorRegisters` additionally accepts the combined AV classes. Fixes: #168761
1 parent bc323b6 commit 0b6db77

10 files changed

+2496
-1269
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -856,8 +856,8 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
856856
}
857857
}
858858

859-
if (TRI->isVectorRegister(*MRI, PHIRes) ||
860-
RC0 == &AMDGPU::VReg_1RegClass) {
859+
if (TRI->hasVectorRegisters(MRI->getRegClass(PHIRes)) ||
860+
RC0 == &AMDGPU::VReg_1RegClass) {
861861
LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
862862
TII->legalizeOperands(MI, MDT);
863863
}

llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll

Lines changed: 315 additions & 327 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,17 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
4848
; CHECK-NEXT: .LBB0_1: ; %Flow9
4949
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
5050
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[24:25]
51-
; CHECK-NEXT: s_cbranch_vccz .LBB0_17
51+
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
52+
; CHECK-NEXT: s_cbranch_vccz .LBB0_18
5253
; CHECK-NEXT: .LBB0_2: ; %._crit_edge1942.i.i.i3548
5354
; CHECK-NEXT: ; =>This Loop Header: Depth=1
54-
; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
55+
; CHECK-NEXT: ; Child Loop BB0_7 Depth 2
5556
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
56-
; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
57+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
5758
; CHECK-NEXT: ; %bb.3: ; %.preheader1868.i.i.i3244
5859
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
5960
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
60-
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
61+
; CHECK-NEXT: s_cbranch_vccz .LBB0_12
6162
; CHECK-NEXT: ; %bb.4: ; %.preheader1855.i.i.i3329.preheader
6263
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
6364
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15]
@@ -85,49 +86,54 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
8586
; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29]
8687
; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[18:19]
8788
; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
88-
; CHECK-NEXT: s_branch .LBB0_6
89-
; CHECK-NEXT: .LBB0_5: ; %Flow
90-
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
89+
; CHECK-NEXT: s_branch .LBB0_7
90+
; CHECK-NEXT: .LBB0_5: ; in Loop: Header=BB0_7 Depth=2
91+
; CHECK-NEXT: s_mov_b64 s[24:25], -1
92+
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
93+
; CHECK-NEXT: s_mov_b64 s[8:9], -1
94+
; CHECK-NEXT: .LBB0_6: ; %Flow
95+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
9196
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
92-
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
93-
; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329
97+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_13
98+
; CHECK-NEXT: .LBB0_7: ; %.preheader1855.i.i.i3329
9499
; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1
95100
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
96101
; CHECK-NEXT: v_accvgpr_read_b32 v27, a1
97102
; CHECK-NEXT: v_accvgpr_read_b32 v26, a0
98-
; CHECK-NEXT: s_mov_b64 s[24:25], -1
99-
; CHECK-NEXT: s_mov_b64 s[8:9], -1
100103
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
101-
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
102104
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
103-
; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291
104-
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
105-
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
106-
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
107-
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
105+
; CHECK-NEXT: ; %bb.8: ; %.lr.ph2070.i.i.i3291
106+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
108107
; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
109-
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
110-
; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325
111-
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
108+
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
109+
; CHECK-NEXT: ; %bb.9: ; %.preheader1856.preheader.i.i.i3325
110+
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
112111
; CHECK-NEXT: v_accvgpr_write_b32 a0, v28
113112
; CHECK-NEXT: s_mov_b64 s[24:25], 0
114113
; CHECK-NEXT: v_accvgpr_write_b32 a1, v29
115114
; CHECK-NEXT: s_mov_b64 s[8:9], 0
116-
; CHECK-NEXT: s_branch .LBB0_5
117-
; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1
115+
; CHECK-NEXT: s_branch .LBB0_6
116+
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_7 Depth=2
117+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
118+
; CHECK-NEXT: s_mov_b64 s[24:25], -1
119+
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
120+
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
121+
; CHECK-NEXT: s_branch .LBB0_6
122+
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_2 Depth=1
123+
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11]
118124
; CHECK-NEXT: s_mov_b64 s[22:23], 0
119-
; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11]
120125
; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21]
121-
; CHECK-NEXT: s_branch .LBB0_15
122-
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1
126+
; CHECK-NEXT: s_branch .LBB0_16
127+
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_2 Depth=1
123128
; CHECK-NEXT: s_mov_b64 s[8:9], -1
124129
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
125-
; CHECK-NEXT: s_branch .LBB0_15
126-
; CHECK-NEXT: .LBB0_11: ; %loop.exit.guard
130+
; CHECK-NEXT: v_mov_b64_e32 v[24:25], v[30:31]
131+
; CHECK-NEXT: s_branch .LBB0_16
132+
; CHECK-NEXT: .LBB0_13: ; %loop.exit.guard
127133
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
128134
; CHECK-NEXT: s_and_b64 vcc, exec, s[24:25]
129-
; CHECK-NEXT: s_cbranch_vccz .LBB0_13
130-
; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit
135+
; CHECK-NEXT: s_cbranch_vccz .LBB0_15
136+
; CHECK-NEXT: ; %bb.14: ; %._crit_edge2105.i.i.i2330.loopexit
131137
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
132138
; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[26:27]
133139
; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17]
@@ -139,24 +145,21 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
139145
; CHECK-NEXT: s_cselect_b32 s23, s23, 0
140146
; CHECK-NEXT: s_cselect_b32 s22, s22, 0
141147
; CHECK-NEXT: s_mov_b64 s[8:9], -1
142-
; CHECK-NEXT: s_branch .LBB0_14
143-
; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1
148+
; CHECK-NEXT: s_branch .LBB0_16
149+
; CHECK-NEXT: .LBB0_15: ; in Loop: Header=BB0_2 Depth=1
144150
; CHECK-NEXT: s_mov_b64 s[8:9], 0
145151
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
146-
; CHECK-NEXT: .LBB0_14: ; %Flow6
147-
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
148-
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
149-
; CHECK-NEXT: .LBB0_15: ; %Flow6
152+
; CHECK-NEXT: .LBB0_16: ; %Flow6
150153
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
151154
; CHECK-NEXT: s_mov_b64 s[24:25], -1
152155
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
153156
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
154-
; CHECK-NEXT: ; %bb.16: ; %._crit_edge2105.i.i.i2330
157+
; CHECK-NEXT: ; %bb.17: ; %._crit_edge2105.i.i.i2330
155158
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
156159
; CHECK-NEXT: s_mov_b64 s[24:25], 0
157160
; CHECK-NEXT: global_store_dwordx2 v20, v[20:21], s[12:13]
158161
; CHECK-NEXT: s_branch .LBB0_1
159-
; CHECK-NEXT: .LBB0_17: ; %DummyReturnBlock
162+
; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock
160163
; CHECK-NEXT: s_endpgm
161164
entry:
162165
br label %._crit_edge1942.i.i.i3548

0 commit comments

Comments
 (0)