Skip to content

Commit 31ec45a

Browse files
authored
[AMDGPU] Fix VGPR lowering for V_DUAL_FMAMK_F32 (#170567)
Fixes: #170552
1 parent 6e47966 commit 31ec45a

File tree

3 files changed

+37
-4
lines changed

3 files changed

+37
-4
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3450,6 +3450,12 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
34503450
static const AMDGPU::OpName VOP2MADMKOps[4] = {
34513451
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
34523452
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3453+
static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3454+
AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3455+
AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3456+
static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3457+
AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3458+
AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
34533459

34543460
unsigned TSFlags = Desc.TSFlags;
34553461

@@ -3491,8 +3497,11 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
34913497
if (TSFlags & SIInstrFlags::VIMAGE)
34923498
return {VIMGOps, nullptr};
34933499

3494-
if (AMDGPU::isVOPD(Desc.getOpcode()))
3495-
return {VOPDOpsX, VOPDOpsY};
3500+
if (AMDGPU::isVOPD(Desc.getOpcode())) {
3501+
auto [OpX, OpY] = getVOPDComponents(Desc.getOpcode());
3502+
return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3503+
(OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3504+
}
34963505

34973506
assert(!(TSFlags & SIInstrFlags::MIMG));
34983507

llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,11 @@ body: |
283283
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
284284
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
285285
286-
; GCN-NEXT: s_set_vgpr_msb 0x544
286+
; GCN-NEXT: s_set_vgpr_msb 0x554
287287
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
288288
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
289289
290-
; GCN-NEXT: s_set_vgpr_msb 0x4410
290+
; GCN-NEXT: s_set_vgpr_msb 0x5410
291291
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
292292
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
293293
@@ -303,6 +303,10 @@ body: |
303303
; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
304304
$vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef $vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
305305
306+
; GCN-NEXT: s_set_vgpr_msb 0xae54
307+
; GCN-NEXT: v_dual_fmac_f32 v7 /*v263*/, v1, v1 /*v257*/ :: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/
308+
$vgpr263, $vgpr500 = V_DUAL_FMAC_F32_e32_X_FMAMK_F32_gfx1250 undef $vgpr1, undef $vgpr257, $vgpr263, undef $vgpr0, 10, undef $vgpr300, implicit $mode, implicit $exec
309+
306310
; ASM: NumVgprs: 777
307311
308312
...

llvm/test/CodeGen/AMDGPU/vopd-combine-gfx1250.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4481,3 +4481,23 @@ body: |
44814481
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
44824482
$vgpr0 = V_ADD_F32_e64_dpp $vgpr0, 0, $vgpr2, 0, $vgpr1, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
44834483
...
4484+
4485+
---
4486+
name: vopd_no_combine_fmamk_src1
4487+
tracksRegLiveness: true
4488+
body: |
4489+
bb.0:
4490+
; SCHED-LABEL: name: vopd_no_combine_fmamk_src1
4491+
; SCHED: $vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
4492+
; SCHED-NEXT: $vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
4493+
;
4494+
; PAIR-LABEL: name: vopd_no_combine_fmamk_src1
4495+
; PAIR: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
4496+
;
4497+
; LOWER-LABEL: name: vopd_no_combine_fmamk_src1
4498+
; LOWER: S_SET_VGPR_MSB 5, implicit-def $mode
4499+
; LOWER-NEXT: $vgpr142, $vgpr145 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 $vgpr377, 1069066811, $vgpr142, 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec
4500+
; LOWER-NEXT: S_SET_VGPR_MSB 1280, implicit-def $mode
4501+
$vgpr142 = V_FMAMK_F32 $vgpr377, 1069066811, $vgpr142, implicit $mode, implicit $exec
4502+
$vgpr145 = V_FMAC_F32_e32 1069066811, $vgpr366, $vgpr145, implicit $mode, implicit $exec
4503+
...

0 commit comments

Comments
 (0)