Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,16 @@ static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) {
if (Idx < 0x100)
return Reg;

unsigned RegNo = Idx % 0x100;
const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
return RC->getRegister(Idx % 0x100);
if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
// This class has 2048 registers with interleaved lo16 and hi16.
RegNo *= 2;
if (Enc & AMDGPU::HWEncoding::IS_HI16)
++RegNo;
}

return RC->getRegister(RegNo);
}

// Restore MSBs of a VGPR above 255 from the MCInstrAnalysis.
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3410,7 +3410,16 @@ MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs,
const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
if (!RC)
return AMDGPU::NoRegister;
return RC->getRegister(Idx | (MSBs << 8));

Idx |= MSBs << 8;
if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
// This class has 2048 registers with interleaved lo16 and hi16.
Idx *= 2;
if (Enc & AMDGPU::HWEncoding::IS_HI16)
++Idx;
}

return RC->getRegister(Idx);
}

std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
Expand Down
66 changes: 66 additions & 0 deletions llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck -check-prefixes=GCN,ASM %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 --mattr=+real-true16 - | FileCheck -check-prefixes=GCN,DIS %s

# ASM-LABEL: {{^}}high_vgprs:
# DIS-LABEL: <high_vgprs>:
---
name: high_vgprs
tracksRegLiveness: true
body: |
bb.0:
; ASM: %bb.0:

; GCN-NEXT: v_add_f16_e64 v0.h, v1.h, v2.h
$vgpr0_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr1_hi16, 0, undef $vgpr2_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v0.l, v1.l, v2.l
$vgpr0_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr1_lo16, 0, undef $vgpr2_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.h, v129.h, v130.h
$vgpr128_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr129_hi16, 0, undef $vgpr130_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.l, v129.l, v130.l
$vgpr128_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr129_lo16, 0, undef $vgpr130_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: s_set_vgpr_msb 0x45
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v256.h*/, v1.h /*v257.h*/, v2.h /*v258.h*/
$vgpr256_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr257_hi16, 0, undef $vgpr258_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v0.l /*v256.l*/, v1.l /*v257.l*/, v2.l /*v258.l*/
$vgpr256_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr257_lo16, 0, undef $vgpr258_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.h /*v384.h*/, v129.h /*v385.h*/, v130.h /*v386.h*/
$vgpr384_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr385_hi16, 0, undef $vgpr386_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: s_set_vgpr_msb 0x8a
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v0.l /*v512.l*/, v1.l /*v513.l*/, v2.l /*v514.l*/
$vgpr512_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr513_lo16, 0, undef $vgpr514_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.h /*v640.h*/, v129.h /*v641.h*/, v130.h /*v642.h*/
$vgpr640_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr641_hi16, 0, undef $vgpr642_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: s_set_vgpr_msb 0xcf
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v0.l /*v768.l*/, v1.l /*v769.l*/, v2.l /*v770.l*/
$vgpr768_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr769_lo16, 0, undef $vgpr770_lo16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.h /*v896.h*/, v129.h /*v897.h*/, v130.h /*v898.h*/
$vgpr896_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr897_hi16, 0, undef $vgpr898_hi16, 0, 0, 0, implicit $exec, implicit $mode

; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
$vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
...
Loading