Skip to content

Commit 7ff4cd4

Browse files
authored
AMDGPU: Start to use AV classes for unknown vector class (llvm#166482)
AMDGPU: Start to use AV classes for unknown vector class Use AGPR+VGPR superclasses for gfx90a+. The type used for the class should be the broadest possible class, to be contextually restricted later. InstrEmitter clamps these to the common subclass of the context use instructions, so we're best off using the broadest possible class for all types. Note this does very little because we only use VGPR classes for FP types (though this doesn't particularly make any sense), and we legalize normal loads and stores to integer.
1 parent d1cc137 commit 7ff4cd4

23 files changed

+1912
-1904
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -91,64 +91,73 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
9191
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
9292

9393
addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
94-
addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
94+
95+
const SIRegisterInfo *TRI = STI.getRegisterInfo();
96+
const TargetRegisterClass *V32RegClass =
97+
TRI->getDefaultVectorSuperClassForBitWidth(32);
98+
addRegisterClass(MVT::f32, V32RegClass);
9599

96100
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
97101

98-
const SIRegisterInfo *TRI = STI.getRegisterInfo();
99-
const TargetRegisterClass *V64RegClass = TRI->getVGPR64Class();
102+
const TargetRegisterClass *V64RegClass =
103+
TRI->getDefaultVectorSuperClassForBitWidth(64);
100104

101105
addRegisterClass(MVT::f64, V64RegClass);
102106
addRegisterClass(MVT::v2f32, V64RegClass);
103107
addRegisterClass(MVT::Untyped, V64RegClass);
104108

105109
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
106-
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
110+
addRegisterClass(MVT::v3f32, TRI->getDefaultVectorSuperClassForBitWidth(96));
107111

108112
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
109113
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
110114

111115
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
112-
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
116+
addRegisterClass(MVT::v4f32, TRI->getDefaultVectorSuperClassForBitWidth(128));
113117

114118
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
115-
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
119+
addRegisterClass(MVT::v5f32, TRI->getDefaultVectorSuperClassForBitWidth(160));
116120

117121
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
118-
addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
122+
addRegisterClass(MVT::v6f32, TRI->getDefaultVectorSuperClassForBitWidth(192));
119123

120124
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
121-
addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
125+
addRegisterClass(MVT::v3f64, TRI->getDefaultVectorSuperClassForBitWidth(192));
122126

123127
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
124-
addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
128+
addRegisterClass(MVT::v7f32, TRI->getDefaultVectorSuperClassForBitWidth(224));
125129

126130
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
127-
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
131+
addRegisterClass(MVT::v8f32, TRI->getDefaultVectorSuperClassForBitWidth(256));
128132

129133
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
130-
addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
134+
addRegisterClass(MVT::v4f64, TRI->getDefaultVectorSuperClassForBitWidth(256));
131135

132136
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
133-
addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
137+
addRegisterClass(MVT::v9f32, TRI->getDefaultVectorSuperClassForBitWidth(288));
134138

135139
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
136-
addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
140+
addRegisterClass(MVT::v10f32,
141+
TRI->getDefaultVectorSuperClassForBitWidth(320));
137142

138143
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
139-
addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
144+
addRegisterClass(MVT::v11f32,
145+
TRI->getDefaultVectorSuperClassForBitWidth(352));
140146

141147
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
142-
addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
148+
addRegisterClass(MVT::v12f32,
149+
TRI->getDefaultVectorSuperClassForBitWidth(384));
143150

144151
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
145-
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
152+
addRegisterClass(MVT::v16f32,
153+
TRI->getDefaultVectorSuperClassForBitWidth(512));
146154

147155
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
148-
addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
156+
addRegisterClass(MVT::v8f64, TRI->getDefaultVectorSuperClassForBitWidth(512));
149157

150158
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
151-
addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
159+
addRegisterClass(MVT::v16f64,
160+
TRI->getDefaultVectorSuperClassForBitWidth(1024));
152161

153162
if (Subtarget->has16BitInsts()) {
154163
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +189,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
180189
}
181190

182191
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
183-
addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
192+
addRegisterClass(MVT::v32f32,
193+
TRI->getDefaultVectorSuperClassForBitWidth(1024));
184194

185195
computeRegisterProperties(Subtarget->getRegisterInfo());
186196

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3557,6 +3557,17 @@ SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
35573557
: getAnyVectorSuperClassForBitWidth(BitWidth);
35583558
}
35593559

3560+
const TargetRegisterClass *
3561+
SIRegisterInfo::getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const {
3562+
// TODO: In principle this should use AV classes for gfx908 too. This is
3563+
// limited to 90a+ to avoid regressing special case copy optimizations which
3564+
// need new handling. The core issue is that it's not possible to directly
3565+
// copy between AGPRs on gfx908, and the current optimizations around that
3566+
// expect to see copies to VGPR.
3567+
return ST.hasGFX90AInsts() ? getVectorSuperClassForBitWidth(BitWidth)
3568+
: getVGPRClassForBitWidth(BitWidth);
3569+
}
3570+
35603571
const TargetRegisterClass *
35613572
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
35623573
if (BitWidth == 16 || BitWidth == 32)

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
215215
const TargetRegisterClass *
216216
getVectorSuperClassForBitWidth(unsigned BitWidth) const;
217217

218+
LLVM_READONLY
219+
const TargetRegisterClass *
220+
getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const;
221+
218222
LLVM_READONLY
219223
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
220224

0 commit comments

Comments
 (0)