Skip to content

Commit 72a6ae6

Browse files
authored
[AMDGPU] Fix wrong MSB encoding for V_FMAMK instructions (#168107)
These instructions use `src0`, `imm`, `src1` as operand. Fixes SWDEV-566579.
1 parent 6dad2c2 commit 72a6ae6

File tree

3 files changed

+89
-9
lines changed

3 files changed

+89
-9
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3439,17 +3439,36 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
34393439
AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
34403440
AMDGPU::OpName::vdstY};
34413441

3442+
// VOP2 MADMK instructions use src0, imm, src1 scheme.
3443+
static const AMDGPU::OpName VOP2MADMKOps[4] = {
3444+
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3445+
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3446+
34423447
unsigned TSFlags = Desc.TSFlags;
34433448

34443449
if (TSFlags &
34453450
(SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
34463451
SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3452+
switch (Desc.getOpcode()) {
34473453
// LD_SCALE operands ignore MSB.
3448-
if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 ||
3449-
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 ||
3450-
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 ||
3451-
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250)
3454+
case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3455+
case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3456+
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3457+
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
34523458
return {};
3459+
case AMDGPU::V_FMAMK_F16:
3460+
case AMDGPU::V_FMAMK_F16_t16:
3461+
case AMDGPU::V_FMAMK_F16_t16_gfx12:
3462+
case AMDGPU::V_FMAMK_F16_fake16:
3463+
case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3464+
case AMDGPU::V_FMAMK_F32:
3465+
case AMDGPU::V_FMAMK_F32_gfx12:
3466+
case AMDGPU::V_FMAMK_F64:
3467+
case AMDGPU::V_FMAMK_F64_gfx1250:
3468+
return {VOP2MADMKOps, nullptr};
3469+
default:
3470+
break;
3471+
}
34533472
return {VOPOps, nullptr};
34543473
}
34553474

llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,35 @@ body: |
6464
; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
6565
$vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
6666
...
67+
68+
# ASM-LABEL: {{^}}fmaak_fmamk:
69+
# DIS-LABEL: <fmaak_fmamk>:
70+
---
71+
name: fmaak_fmamk
72+
tracksRegLiveness: true
73+
body: |
74+
bb.0:
75+
; ASM: %bb.0:
76+
77+
; We use an extra instruction to set the MSB, and then we expect it to be reset to 0 (lower 16-bit).
78+
79+
; GCN: s_set_vgpr_msb 0xcf
80+
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
81+
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
82+
83+
; GCN-NEXT: s_set_vgpr_msb 0xcf00
84+
; GCN-NEXT: v_fmamk_f16 v26.l, v56.l, 0x1, v58.l
85+
$vgpr26_lo16 = V_FMAMK_F16_t16 undef $vgpr56_lo16, 1, undef $vgpr58_lo16, implicit $exec, implicit $mode
86+
87+
; GCN-NEXT: v_fmamk_f16 v0.l, v35.l, 0x1, v2.l
88+
$vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr35_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode
89+
90+
; GCN-NEXT: v_fmamk_f16 v0.l, v2.l, 0x1, v6.l
91+
$vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr6_lo16, implicit $exec, implicit $mode
92+
93+
; GCN-NEXT: v_fmamk_f16 v5.l, v2.l, 0x1, v4.l
94+
$vgpr5_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
95+
96+
; ASM: NumVgprs: 771
97+
98+
...

llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -332,23 +332,52 @@ body: |
332332
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
333333
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
334334
335-
; GCN-NEXT: s_set_vgpr_msb 0x4445
335+
; GCN-NEXT: s_set_vgpr_msb 0x4451
336336
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
337337
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
338338
339-
; GCN-NEXT: s_set_vgpr_msb 0x4505
339+
; GCN-NEXT: s_set_vgpr_msb 0x5111
340340
; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
341341
$vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
342342
343-
; GCN-NEXT: s_set_vgpr_msb 0x541
343+
; GCN-NEXT: s_set_vgpr_msb 0x1141
344344
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
345345
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode
346346
347-
; GCN-NEXT: s_set_vgpr_msb 0x4144
347+
; GCN-NEXT: s_set_vgpr_msb 0x4150
348348
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
349349
$vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode
350350
351-
; ASM: NumVgprs: 259
351+
; GCN-NEXT: s_set_vgpr_msb 0x5051
352+
; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
353+
$vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode
354+
355+
; GCN-NEXT: s_set_vgpr_msb 0x5101
356+
; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
357+
$vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode
358+
359+
; GCN-NEXT: s_set_vgpr_msb 0x110
360+
; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
361+
$vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode
362+
363+
; GCN-NEXT: s_set_vgpr_msb 0x1040
364+
; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
365+
$vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode
366+
367+
; GCN-NEXT: s_set_vgpr_msb 0x4000
368+
; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58
369+
$vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode
370+
371+
; GCN-NEXT: v_fmamk_f16 v0, v35, 0x1, v2
372+
$vgpr0 = V_FMAMK_F16_fake16 undef $vgpr35, 1, undef $vgpr2, implicit $exec, implicit $mode
373+
374+
; GCN-NEXT: v_fmamk_f16 v0, v2, 0x1, v6
375+
$vgpr0 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr6, implicit $exec, implicit $mode
376+
377+
; GCN-NEXT: v_fmamk_f16 v5, v2, 0x1, v4
378+
$vgpr5 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr4, implicit $exec, implicit $mode
379+
380+
; ASM: NumVgprs: 358
352381
353382
...
354383

0 commit comments

Comments
 (0)