Skip to content

Conversation

@rampitec
Copy link
Collaborator

No description provided.

Copy link
Collaborator Author

This stack of pull requests is managed by Graphite. Learn more about stacking.

@rampitec rampitec requested review from Sisyph and kosarev September 22, 2025 22:19
@rampitec rampitec marked this pull request as ready for review September 22, 2025 22:19
@llvmbot
Copy link
Member

llvmbot commented Sep 22, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/160209.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+9-1)
  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+10-1)
  • (added) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir (+66)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f098e7a3c6c67..7f5546a6c7bc8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -332,8 +332,16 @@ static MCPhysReg getRegForPrinting(MCPhysReg Reg, const MCRegisterInfo &MRI) {
   if (Idx < 0x100)
     return Reg;
 
+  unsigned RegNo = Idx % 0x100;
   const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
-  return RC->getRegister(Idx % 0x100);
+  if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
+    // This class has 2048 registers with interleaved lo16 and hi16.
+    RegNo *= 2;
+    if (Enc & AMDGPU::HWEncoding::IS_HI16)
+      ++RegNo;
+  }
+
+  return RC->getRegister(RegNo);
 }
 
 // Restore MSBs of a VGPR above 255 from the MCInstrAnalysis.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c80302e03beea..20fa1412a778e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3410,7 +3410,16 @@ MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs,
   const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
   if (!RC)
     return AMDGPU::NoRegister;
-  return RC->getRegister(Idx | (MSBs << 8));
+
+  Idx |= MSBs << 8;
+  if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
+    // This class has 2048 registers with interleaved lo16 and hi16.
+    Idx *= 2;
+    if (Enc & AMDGPU::HWEncoding::IS_HI16)
+      ++Idx;
+  }
+
+  return RC->getRegister(Idx);
 }
 
 std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
new file mode 100644
index 0000000000000..8a70a8acd28d3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -0,0 +1,66 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck -check-prefixes=GCN,ASM %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 --mattr=+real-true16 - | FileCheck -check-prefixes=GCN,DIS %s
+
+# ASM-LABEL: {{^}}high_vgprs:
+# DIS-LABEL: <high_vgprs>:
+---
+name:            high_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; ASM: %bb.0:
+
+    ; GCN-NEXT: v_add_f16_e64 v0.h, v1.h, v2.h
+    $vgpr0_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr1_hi16, 0, undef $vgpr2_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v0.l, v1.l, v2.l
+    $vgpr0_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr1_lo16, 0, undef $vgpr2_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.h, v129.h, v130.h
+    $vgpr128_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr129_hi16, 0, undef $vgpr130_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.l, v129.l, v130.l
+    $vgpr128_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr129_lo16, 0, undef $vgpr130_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: s_set_vgpr_msb 0x45
+    ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 src1=1 src2=0
+    ; GCN-NEXT: v_add_f16_e64 v0.h /*v256.h*/, v1.h /*v257.h*/, v2.h /*v258.h*/
+    $vgpr256_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr257_hi16, 0, undef $vgpr258_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v0.l /*v256.l*/, v1.l /*v257.l*/, v2.l /*v258.l*/
+    $vgpr256_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr257_lo16, 0, undef $vgpr258_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.h /*v384.h*/, v129.h /*v385.h*/, v130.h /*v386.h*/
+    $vgpr384_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr385_hi16, 0, undef $vgpr386_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
+    $vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: s_set_vgpr_msb 0x8a
+    ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 src1=2 src2=0
+    ; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
+    $vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v0.l /*v512.l*/, v1.l /*v513.l*/, v2.l /*v514.l*/
+    $vgpr512_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr513_lo16, 0, undef $vgpr514_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.h /*v640.h*/, v129.h /*v641.h*/, v130.h /*v642.h*/
+    $vgpr640_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr641_hi16, 0, undef $vgpr642_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
+    $vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: s_set_vgpr_msb 0xcf
+    ; ASM-SAME:                                         ;  msbs: dst=3 src0=3 src1=3 src2=0
+    ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
+    $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v0.l /*v768.l*/, v1.l /*v769.l*/, v2.l /*v770.l*/
+    $vgpr768_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr769_lo16, 0, undef $vgpr770_lo16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.h /*v896.h*/, v129.h /*v897.h*/, v130.h /*v898.h*/
+    $vgpr896_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr897_hi16, 0, undef $vgpr898_hi16, 0, 0, 0, implicit $exec, implicit $mode
+
+    ; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
+    $vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
+...

@rampitec
Copy link
Collaborator Author

Remember, in June we had a discussion if MSB lowering works correctly with t16? Well, it does not. This is the fix.

@rampitec rampitec merged commit f693a7f into main Sep 23, 2025
13 checks passed
@rampitec rampitec deleted the users/rampitec/09-22-_amdgpu_fix_high_vgpr_printing_with_true16 branch September 23, 2025 16:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants