Skip to content

Commit f693a7f

Browse files
authored
[AMDGPU] Fix high vgpr printing with true16 (#160209)
1 parent 51a86e7 commit f693a7f

File tree

3 files changed

+85
-2
lines changed

3 files changed

+85
-2
lines changed

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,16 @@ static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) {
332332
if (Idx < 0x100)
333333
return Reg;
334334

335+
unsigned RegNo = Idx % 0x100;
335336
const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
336-
return RC->getRegister(Idx % 0x100);
337+
if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
338+
// This class has 2048 registers with interleaved lo16 and hi16.
339+
RegNo *= 2;
340+
if (Enc & AMDGPU::HWEncoding::IS_HI16)
341+
++RegNo;
342+
}
343+
344+
return RC->getRegister(RegNo);
337345
}
338346

339347
// Restore MSBs of a VGPR above 255 from the MCInstrAnalysis.

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3410,7 +3410,16 @@ MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs,
34103410
const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
34113411
if (!RC)
34123412
return AMDGPU::NoRegister;
3413-
return RC->getRegister(Idx | (MSBs << 8));
3413+
3414+
Idx |= MSBs << 8;
3415+
if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3416+
// This class has 2048 registers with interleaved lo16 and hi16.
3417+
Idx *= 2;
3418+
if (Enc & AMDGPU::HWEncoding::IS_HI16)
3419+
++Idx;
3420+
}
3421+
3422+
return RC->getRegister(Idx);
34143423
}
34153424

34163425
std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck -check-prefixes=GCN,ASM %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 --mattr=+real-true16 - | FileCheck -check-prefixes=GCN,DIS %s
3+
4+
# ASM-LABEL: {{^}}high_vgprs:
5+
# DIS-LABEL: <high_vgprs>:
6+
---
7+
name: high_vgprs
8+
tracksRegLiveness: true
9+
body: |
10+
bb.0:
11+
; ASM: %bb.0:
12+
13+
; GCN-NEXT: v_add_f16_e64 v0.h, v1.h, v2.h
14+
$vgpr0_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr1_hi16, 0, undef $vgpr2_hi16, 0, 0, 0, implicit $exec, implicit $mode
15+
16+
; GCN-NEXT: v_add_f16_e64 v0.l, v1.l, v2.l
17+
$vgpr0_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr1_lo16, 0, undef $vgpr2_lo16, 0, 0, 0, implicit $exec, implicit $mode
18+
19+
; GCN-NEXT: v_add_f16_e64 v128.h, v129.h, v130.h
20+
$vgpr128_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr129_hi16, 0, undef $vgpr130_hi16, 0, 0, 0, implicit $exec, implicit $mode
21+
22+
; GCN-NEXT: v_add_f16_e64 v128.l, v129.l, v130.l
23+
$vgpr128_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr129_lo16, 0, undef $vgpr130_lo16, 0, 0, 0, implicit $exec, implicit $mode
24+
25+
; GCN-NEXT: s_set_vgpr_msb 0x45
26+
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0
27+
; GCN-NEXT: v_add_f16_e64 v0.h /*v256.h*/, v1.h /*v257.h*/, v2.h /*v258.h*/
28+
$vgpr256_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr257_hi16, 0, undef $vgpr258_hi16, 0, 0, 0, implicit $exec, implicit $mode
29+
30+
; GCN-NEXT: v_add_f16_e64 v0.l /*v256.l*/, v1.l /*v257.l*/, v2.l /*v258.l*/
31+
$vgpr256_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr257_lo16, 0, undef $vgpr258_lo16, 0, 0, 0, implicit $exec, implicit $mode
32+
33+
; GCN-NEXT: v_add_f16_e64 v128.h /*v384.h*/, v129.h /*v385.h*/, v130.h /*v386.h*/
34+
$vgpr384_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr385_hi16, 0, undef $vgpr386_hi16, 0, 0, 0, implicit $exec, implicit $mode
35+
36+
; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
37+
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
38+
39+
; GCN-NEXT: s_set_vgpr_msb 0x8a
40+
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
41+
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
42+
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
43+
44+
; GCN-NEXT: v_add_f16_e64 v0.l /*v512.l*/, v1.l /*v513.l*/, v2.l /*v514.l*/
45+
$vgpr512_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr513_lo16, 0, undef $vgpr514_lo16, 0, 0, 0, implicit $exec, implicit $mode
46+
47+
; GCN-NEXT: v_add_f16_e64 v128.h /*v640.h*/, v129.h /*v641.h*/, v130.h /*v642.h*/
48+
$vgpr640_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr641_hi16, 0, undef $vgpr642_hi16, 0, 0, 0, implicit $exec, implicit $mode
49+
50+
; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
51+
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
52+
53+
; GCN-NEXT: s_set_vgpr_msb 0xcf
54+
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
55+
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
56+
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
57+
58+
; GCN-NEXT: v_add_f16_e64 v0.l /*v768.l*/, v1.l /*v769.l*/, v2.l /*v770.l*/
59+
$vgpr768_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr769_lo16, 0, undef $vgpr770_lo16, 0, 0, 0, implicit $exec, implicit $mode
60+
61+
; GCN-NEXT: v_add_f16_e64 v128.h /*v896.h*/, v129.h /*v897.h*/, v130.h /*v898.h*/
62+
$vgpr896_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr897_hi16, 0, undef $vgpr898_hi16, 0, 0, 0, implicit $exec, implicit $mode
63+
64+
; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
65+
$vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
66+
...

0 commit comments

Comments
 (0)