Skip to content

Commit 842bd36

Browse files
committed
AMDGPU: Start to use AV classes for unknown vector class
Use AGPR+VGPR superclasses for gfx90a+. The type used for the class should be the broadest possible class, to be contextually restricted later. InstrEmitter clamps these to the common subclass of the context use instructions, so we're best off using the broadest possible class for all types. Note this does very little because we only use VGPR classes for FP types (though this doesn't particularly make any sense), and we legalize normal loads and stores to integer.
1 parent 9c1ccff commit 842bd36

16 files changed

+662
-655
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -96,59 +96,65 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
9696
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
9797

9898
const SIRegisterInfo *TRI = STI.getRegisterInfo();
99-
const TargetRegisterClass *V64RegClass = TRI->getVGPR64Class();
99+
const TargetRegisterClass *V64RegClass =
100+
TRI->getDefaultVectorSuperClassForBitWidth(64);
100101

101102
addRegisterClass(MVT::f64, V64RegClass);
102103
addRegisterClass(MVT::v2f32, V64RegClass);
103104
addRegisterClass(MVT::Untyped, V64RegClass);
104105

105106
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
106-
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
107+
addRegisterClass(MVT::v3f32, TRI->getDefaultVectorSuperClassForBitWidth(96));
107108

108109
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
109110
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
110111

111112
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
112-
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
113+
addRegisterClass(MVT::v4f32, TRI->getDefaultVectorSuperClassForBitWidth(128));
113114

114115
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
115-
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
116+
addRegisterClass(MVT::v5f32, TRI->getDefaultVectorSuperClassForBitWidth(160));
116117

117118
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
118-
addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
119+
addRegisterClass(MVT::v6f32, TRI->getDefaultVectorSuperClassForBitWidth(192));
119120

120121
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
121-
addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
122+
addRegisterClass(MVT::v3f64, TRI->getDefaultVectorSuperClassForBitWidth(192));
122123

123124
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
124-
addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
125+
addRegisterClass(MVT::v7f32, TRI->getDefaultVectorSuperClassForBitWidth(224));
125126

126127
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
127-
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
128+
addRegisterClass(MVT::v8f32, TRI->getDefaultVectorSuperClassForBitWidth(256));
128129

129130
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
130-
addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
131+
addRegisterClass(MVT::v4f64, TRI->getDefaultVectorSuperClassForBitWidth(256));
131132

132133
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
133-
addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
134+
addRegisterClass(MVT::v9f32, TRI->getDefaultVectorSuperClassForBitWidth(288));
134135

135136
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
136-
addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
137+
addRegisterClass(MVT::v10f32,
138+
TRI->getDefaultVectorSuperClassForBitWidth(320));
137139

138140
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
139-
addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
141+
addRegisterClass(MVT::v11f32,
142+
TRI->getDefaultVectorSuperClassForBitWidth(352));
140143

141144
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
142-
addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
145+
addRegisterClass(MVT::v12f32,
146+
TRI->getDefaultVectorSuperClassForBitWidth(384));
143147

144148
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
145-
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
149+
addRegisterClass(MVT::v16f32,
150+
TRI->getDefaultVectorSuperClassForBitWidth(512));
146151

147152
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
148-
addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
153+
addRegisterClass(MVT::v8f64, TRI->getDefaultVectorSuperClassForBitWidth(512));
149154

150155
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
151-
addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
156+
addRegisterClass(MVT::v16f64,
157+
TRI->getDefaultVectorSuperClassForBitWidth(1024));
152158

153159
if (Subtarget->has16BitInsts()) {
154160
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +186,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
180186
}
181187

182188
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
183-
addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
189+
addRegisterClass(MVT::v32f32,
190+
TRI->getDefaultVectorSuperClassForBitWidth(1024));
184191

185192
computeRegisterProperties(Subtarget->getRegisterInfo());
186193

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3557,6 +3557,17 @@ SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
35573557
: getAnyVectorSuperClassForBitWidth(BitWidth);
35583558
}
35593559

3560+
const TargetRegisterClass *
3561+
SIRegisterInfo::getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const {
3562+
// TODO: In principle this should use AV classes for gfx908 too. This is
3563+
// limited to 90a+ to avoid regressing special case copy optimizations which
3564+
// need new handling. The core issue is that it's not possible to directly
3565+
// copy between AGPRs on gfx908, and the current optimizations around that
3566+
// expect to see copies to VGPR.
3567+
return ST.hasGFX90AInsts() ? getVectorSuperClassForBitWidth(BitWidth)
3568+
: getVGPRClassForBitWidth(BitWidth);
3569+
}
3570+
35603571
const TargetRegisterClass *
35613572
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
35623573
if (BitWidth == 16 || BitWidth == 32)

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
215215
const TargetRegisterClass *
216216
getVectorSuperClassForBitWidth(unsigned BitWidth) const;
217217

218+
LLVM_READONLY
219+
const TargetRegisterClass *
220+
getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
221+
218222
LLVM_READONLY
219223
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
220224

0 commit comments

Comments
 (0)