diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index f098e7a3c6c67..7f5546a6c7bc8 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -332,8 +332,16 @@ static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) { if (Idx < 0x100) return Reg; + unsigned RegNo = Idx % 0x100; const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI); - return RC->getRegister(Idx % 0x100); + if (RC->getID() == AMDGPU::VGPR_16RegClassID) { + // This class has 2048 registers with interleaved lo16 and hi16. + RegNo *= 2; + if (Enc & AMDGPU::HWEncoding::IS_HI16) + ++RegNo; + } + + return RC->getRegister(RegNo); } // Restore MSBs of a VGPR above 255 from the MCInstrAnalysis. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index c80302e03beea..20fa1412a778e 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3410,7 +3410,16 @@ MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs, const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI); if (!RC) return AMDGPU::NoRegister; - return RC->getRegister(Idx | (MSBs << 8)); + + Idx |= MSBs << 8; + if (RC->getID() == AMDGPU::VGPR_16RegClassID) { + // This class has 2048 registers with interleaved lo16 and hi16. + Idx *= 2; + if (Enc & AMDGPU::HWEncoding::IS_HI16) + ++Idx; + } + + return RC->getRegister(Idx); } std::pair diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir new file mode 100644 index 0000000000000..8a70a8acd28d3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir @@ -0,0 +1,66 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck -check-prefixes=GCN,ASM %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 --mattr=+real-true16 - | FileCheck -check-prefixes=GCN,DIS %s + +# ASM-LABEL: {{^}}high_vgprs: +# DIS-LABEL: : +--- +name: high_vgprs +tracksRegLiveness: true +body: | + bb.0: + ; ASM: %bb.0: + + ; GCN-NEXT: v_add_f16_e64 v0.h, v1.h, v2.h + $vgpr0_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr1_hi16, 0, undef $vgpr2_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v0.l, v1.l, v2.l + $vgpr0_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr1_lo16, 0, undef $vgpr2_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.h, v129.h, v130.h + $vgpr128_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr129_hi16, 0, undef $vgpr130_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.l, v129.l, v130.l + $vgpr128_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr129_lo16, 0, undef $vgpr130_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x45 + ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0 + ; GCN-NEXT: v_add_f16_e64 v0.h /*v256.h*/, v1.h /*v257.h*/, v2.h /*v258.h*/ + $vgpr256_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr257_hi16, 0, undef $vgpr258_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v0.l /*v256.l*/, v1.l /*v257.l*/, v2.l /*v258.l*/ + $vgpr256_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr257_lo16, 0, undef $vgpr258_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.h /*v384.h*/, v129.h /*v385.h*/, v130.h /*v386.h*/ + $vgpr384_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr385_hi16, 0, undef $vgpr386_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/ + $vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0x8a + ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0 + ; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/ + $vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v0.l /*v512.l*/, v1.l /*v513.l*/, v2.l /*v514.l*/ + $vgpr512_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr513_lo16, 0, undef $vgpr514_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.h /*v640.h*/, v129.h /*v641.h*/, v130.h /*v642.h*/ + $vgpr640_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr641_hi16, 0, undef $vgpr642_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/ + $vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: s_set_vgpr_msb 0xcf + ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0 + ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/ + $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v0.l /*v768.l*/, v1.l /*v769.l*/, v2.l /*v770.l*/ + $vgpr768_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr769_lo16, 0, undef $vgpr770_lo16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.h /*v896.h*/, v129.h /*v897.h*/, v130.h /*v898.h*/ + $vgpr896_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr897_hi16, 0, undef $vgpr898_hi16, 0, 0, 0, implicit $exec, implicit $mode + + ; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/ + $vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode +...