Skip to content

Commit 9922d8d

Browse files
authored
AMDGPU: Stop using aligned VGPR classes for addRegisterClass (llvm#158278)
This is unnecessary. At use emission time, InstrEmitter will use the common subclass of the value type's register class and the use instruction register classes. This removes one of the obstacles to treating special case instructions that do not have the alignment requirement overly conservatively.
1 parent e18461a commit 9922d8d

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -103,52 +103,52 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
103103
addRegisterClass(MVT::Untyped, V64RegClass);
104104

105105
addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
106-
addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
106+
addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
107107

108108
addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
109109
addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);
110110

111111
addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
112-
addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));
112+
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
113113

114114
addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
115-
addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));
115+
addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
116116

117117
addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
118-
addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));
118+
addRegisterClass(MVT::v6f32, &AMDGPU::VReg_192RegClass);
119119

120120
addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
121-
addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));
121+
addRegisterClass(MVT::v3f64, &AMDGPU::VReg_192RegClass);
122122

123123
addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
124-
addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));
124+
addRegisterClass(MVT::v7f32, &AMDGPU::VReg_224RegClass);
125125

126126
addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
127-
addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));
127+
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
128128

129129
addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
130-
addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));
130+
addRegisterClass(MVT::v4f64, &AMDGPU::VReg_256RegClass);
131131

132132
addRegisterClass(MVT::v9i32, &AMDGPU::SGPR_288RegClass);
133-
addRegisterClass(MVT::v9f32, TRI->getVGPRClassForBitWidth(288));
133+
addRegisterClass(MVT::v9f32, &AMDGPU::VReg_288RegClass);
134134

135135
addRegisterClass(MVT::v10i32, &AMDGPU::SGPR_320RegClass);
136-
addRegisterClass(MVT::v10f32, TRI->getVGPRClassForBitWidth(320));
136+
addRegisterClass(MVT::v10f32, &AMDGPU::VReg_320RegClass);
137137

138138
addRegisterClass(MVT::v11i32, &AMDGPU::SGPR_352RegClass);
139-
addRegisterClass(MVT::v11f32, TRI->getVGPRClassForBitWidth(352));
139+
addRegisterClass(MVT::v11f32, &AMDGPU::VReg_352RegClass);
140140

141141
addRegisterClass(MVT::v12i32, &AMDGPU::SGPR_384RegClass);
142-
addRegisterClass(MVT::v12f32, TRI->getVGPRClassForBitWidth(384));
142+
addRegisterClass(MVT::v12f32, &AMDGPU::VReg_384RegClass);
143143

144144
addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
145-
addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));
145+
addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
146146

147147
addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
148-
addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));
148+
addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass);
149149

150150
addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
151-
addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
151+
addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass);
152152

153153
if (Subtarget->has16BitInsts()) {
154154
if (Subtarget->useRealTrue16Insts()) {
@@ -180,7 +180,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
180180
}
181181

182182
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
183-
addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));
183+
addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
184184

185185
computeRegisterProperties(Subtarget->getRegisterInfo());
186186

llvm/test/CodeGen/AMDGPU/mfma-loop.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2399,8 +2399,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
23992399
; GFX90A-NEXT: v_accvgpr_mov_b32 a29, a0
24002400
; GFX90A-NEXT: v_accvgpr_mov_b32 a30, a0
24012401
; GFX90A-NEXT: v_accvgpr_mov_b32 a31, a0
2402-
; GFX90A-NEXT: v_mov_b32_e32 v0, 2.0
2403-
; GFX90A-NEXT: v_mov_b32_e32 v1, 1.0
2402+
; GFX90A-NEXT: v_mov_b32_e32 v0, 1.0
2403+
; GFX90A-NEXT: v_mov_b32_e32 v1, 2.0
2404+
; GFX90A-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
24042405
; GFX90A-NEXT: .LBB9_1: ; %for.cond.preheader
24052406
; GFX90A-NEXT: ; =>This Loop Header: Depth=1
24062407
; GFX90A-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2409,7 +2410,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24092410
; GFX90A-NEXT: ; Parent Loop BB9_1 Depth=1
24102411
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
24112412
; GFX90A-NEXT: s_nop 0
2412-
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v1, v0, a[0:31]
2413+
; GFX90A-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
24132414
; GFX90A-NEXT: s_add_i32 s1, s1, -1
24142415
; GFX90A-NEXT: s_cmp_lg_u32 s1, 0
24152416
; GFX90A-NEXT: s_cbranch_scc1 .LBB9_2
@@ -2468,8 +2469,9 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24682469
; GFX942-NEXT: v_accvgpr_mov_b32 a29, a0
24692470
; GFX942-NEXT: v_accvgpr_mov_b32 a30, a0
24702471
; GFX942-NEXT: v_accvgpr_mov_b32 a31, a0
2471-
; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
2472-
; GFX942-NEXT: v_mov_b32_e32 v1, 1.0
2472+
; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
2473+
; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
2474+
; GFX942-NEXT: ; kill: def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 killed $exec
24732475
; GFX942-NEXT: .LBB9_1: ; %for.cond.preheader
24742476
; GFX942-NEXT: ; =>This Loop Header: Depth=1
24752477
; GFX942-NEXT: ; Child Loop BB9_2 Depth 2
@@ -2478,7 +2480,7 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg)
24782480
; GFX942-NEXT: ; Parent Loop BB9_1 Depth=1
24792481
; GFX942-NEXT: ; => This Inner Loop Header: Depth=2
24802482
; GFX942-NEXT: s_nop 0
2481-
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v1, v0, a[0:31]
2483+
; GFX942-NEXT: v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[0:31]
24822484
; GFX942-NEXT: s_add_i32 s1, s1, -1
24832485
; GFX942-NEXT: s_cmp_lg_u32 s1, 0
24842486
; GFX942-NEXT: s_cbranch_scc1 .LBB9_2

0 commit comments

Comments
 (0)