Skip to content

Conversation

@rampitec
Copy link
Collaborator

@rampitec rampitec commented Dec 3, 2025

Fixes: #170552

@rampitec
Copy link
Collaborator Author

rampitec commented Dec 3, 2025

This stack of pull requests is managed by sgh.

@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Fixes: #170552


Full diff: https://github.com/llvm/llvm-project/pull/170567.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+11-2)
  • (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir (+6-2)
  • (modified) llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir (+20)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 90f0b49ab9a78..bcfdb2ca5a3da 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3450,6 +3450,12 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
   static const AMDGPU::OpName VOP2MADMKOps[4] = {
       AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
       AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
+  static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
+      AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
+      AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
+  static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
+      AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
+      AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
 
   unsigned TSFlags = Desc.TSFlags;
 
@@ -3491,8 +3497,11 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
   if (TSFlags & SIInstrFlags::VIMAGE)
     return {VIMGOps, nullptr};
 
-  if (AMDGPU::isVOPD(Desc.getOpcode()))
-    return {VOPDOpsX, VOPDOpsY};
+  if (AMDGPU::isVOPD(Desc.getOpcode())) {
+    auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
+    return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
+            (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
+  }
 
   assert(!(TSFlags & SIInstrFlags::MIMG));
 
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index e8c27f2eb3685..21f5515b7fb91 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -283,11 +283,11 @@ body:             |
     ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
     $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x544
+    ; GCN-NEXT: s_set_vgpr_msb 0x554
     ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
     $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x4410
+    ; GCN-NEXT: s_set_vgpr_msb 0x5410
     ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
     $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
 
@@ -303,6 +303,10 @@ body:             |
     ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
     $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
 
+    ; GCN-NEXT: s_set_vgpr_msb 0xae54
+    ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/
+    $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec
+
     ; ASM: NumVgprs: 777
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
index b05edd046b874..05bbb0f54ef9e 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
@@ -4481,3 +4481,23 @@ body: |
       $vgpr3 = V_MOV_B32_e32 0, implicit $exec
       $vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
 ...
+
+---
+name: vopd_no_combine_fmamk_src1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; SCHED-LABEL: name: vopd_no_combine_fmamk_src1
+    ; SCHED: $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+    ; SCHED-NEXT: $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+    ;
+    ; PAIR-LABEL: name: vopd_no_combine_fmamk_src1
+    ; PAIR: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ;
+    ; LOWER-LABEL: name: vopd_no_combine_fmamk_src1
+    ; LOWER: S_SET_VGPR_MSB 5, implicit-def $mode
+    ; LOWER-NEXT: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+    ; LOWER-NEXT: S_SET_VGPR_MSB 1280, implicit-def $mode
+    $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+    $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+...

static const AMDGPU::OpName VOP2MADMKOps[4] = {
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we are gonna run into similar issues in the future as well, since we have so many variants of how these operands are named. I fixed one not long ago as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this is gonna need something different in the downstream version as well. :-D

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are more opcodes like this, yes.

@rampitec rampitec merged commit 31ec45a into main Dec 3, 2025
14 checks passed
@rampitec rampitec deleted the users/rampitec/fix-dual-fmamk-vgpr-lowering branch December 3, 2025 23:12
@jayfoad
Copy link
Contributor

jayfoad commented Dec 4, 2025

Thanks!

@jayfoad
Copy link
Contributor

jayfoad commented Dec 4, 2025

In another part of the docs I see:

Instructions with non-standard operands

  • FMAAK: uses SRC0 and SRC1 MSBs
  • FMAMK: uses SRC0 and SRC2 MSBs

so do we need a special case for FMAAK too?

@rampitec
Copy link
Collaborator Author

rampitec commented Dec 4, 2025

In another part of the docs I see:

Instructions with non-standard operands

  • FMAAK: uses SRC0 and SRC1 MSBs
  • FMAMK: uses SRC0 and SRC2 MSBs

so do we need a special case for FMAAK too?

I think FMAAK is fine. The operand order is src0, src1, kimm, so it is natural.

$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
; GCN-NEXT: s_set_vgpr_msb 0x4410
; GCN-NEXT: s_set_vgpr_msb 0x5410
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a side note I wish a better asm syntax here. The test can be improved by adding asm comments, but ideally asm syntax shall show src0/1/2/vdst components individually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah we really should support SP3 structured initializer syntax for lots of things like:

s_set_vgpr_msb { src0_msb: 2, dst_msb: 1 } // BF860042

FYI @kosarev

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did they? Then we definitely should.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we unfortunately need to add 'old' values. I understand that SP3 does not care about the trap handler.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a compiler we have a convention to reset MSBs before any join. As a pure asm tool SP3 does not. So it does not know old values, but we should, so our syntax must diverge here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, contacted Justin, because it is a useless syntax if we cannot record old bits.

kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[AMDGPU] "Invalid VOPD pair was created" error with V_FMAMK_F32

6 participants