-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU] Fix VGPR lowering for V_DUAL_FMAMK_F32 #170567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
This stack of pull requests is managed by sgh. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesFixes: #170552 Full diff: https://github.com/llvm/llvm-project/pull/170567.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 90f0b49ab9a78..bcfdb2ca5a3da 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3450,6 +3450,12 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
static const AMDGPU::OpName VOP2MADMKOps[4] = {
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
+ static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
+ AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
+ static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
+ AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
unsigned TSFlags = Desc.TSFlags;
@@ -3491,8 +3497,11 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
if (TSFlags & SIInstrFlags::VIMAGE)
return {VIMGOps, nullptr};
- if (AMDGPU::isVOPD(Desc.getOpcode()))
- return {VOPDOpsX, VOPDOpsY};
+ if (AMDGPU::isVOPD(Desc.getOpcode())) {
+ auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
+ return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
+ (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
+ }
assert(!(TSFlags & SIInstrFlags::MIMG));
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index e8c27f2eb3685..21f5515b7fb91 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -283,11 +283,11 @@ body: |
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x544
+ ; GCN-NEXT: s_set_vgpr_msb 0x554
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x4410
+ ; GCN-NEXT: s_set_vgpr_msb 0x5410
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
@@ -303,6 +303,10 @@ body: |
; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
$vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
+ ; GCN-NEXT: s_set_vgpr_msb 0xae54
+ ; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/
+ $vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec
+
; ASM: NumVgprs: 777
...
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
index b05edd046b874..05bbb0f54ef9e 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir
@@ -4481,3 +4481,23 @@ body: |
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
...
+
+---
+name: vopd_no_combine_fmamk_src1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; SCHED-LABEL: name: vopd_no_combine_fmamk_src1
+ ; SCHED: $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+ ; SCHED-NEXT: $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+ ;
+ ; PAIR-LABEL: name: vopd_no_combine_fmamk_src1
+ ; PAIR: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ;
+ ; LOWER-LABEL: name: vopd_no_combine_fmamk_src1
+ ; LOWER: S_SET_VGPR_MSB 5, implicit-def $mode
+ ; LOWER-NEXT: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
+ ; LOWER-NEXT: S_SET_VGPR_MSB 1280, implicit-def $mode
+ $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
+ $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
+...
|
| static const AMDGPU::OpName VOP2MADMKOps[4] = { | ||
| AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES, | ||
| AMDGPU::OpName::src1, AMDGPU::OpName::vdst}; | ||
| static const AMDGPU::OpName VOPDFMAMKOpsX[4] = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we are gonna run into similar issues in the future as well, since we have so many variants of how these operands are named. I fixed one not long ago as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And this is gonna need something different in the downstream version as well. :-D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there are more opcodes like this, yes.
|
Thanks! |
|
In another part of the docs I see:
so do we need a special case for FMAAK too? |
I think FMAAK is fine. The operand order is src0, src1, kimm, so it is natural. |
| $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec | ||
| ; GCN-NEXT: s_set_vgpr_msb 0x4410 | ||
| ; GCN-NEXT: s_set_vgpr_msb 0x5410 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a side note I wish a better asm syntax here. The test can be improved by adding asm comments, but ideally asm syntax shall show src0/1/2/vdst components individually.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah we really should support SP3 structured initializer syntax for lots of things like:
s_set_vgpr_msb { src0_msb: 2, dst_msb: 1 } // BF860042
FYI @kosarev
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did they? Then we definitely should.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But we unfortunately need to add 'old' values. I understand that SP3 does not care about the trap handler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a compiler we have a convention to reset MSBs before any join. As a pure asm tool SP3 does not. So it does not know old values, but we should, so our syntax must diverge here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI, contacted Justin, because it is a useless syntax if we cannot record old bits.
Fixes: #170552