Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3439,17 +3439,36 @@ getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
AMDGPU::OpName::vdstY};

// VOP2 MADMK instructions use src0, imm, src1 scheme.
static const AMDGPU::OpName VOP2MADMKOps[4] = {
AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
AMDGPU::OpName::src1, AMDGPU::OpName::vdst};

unsigned TSFlags = Desc.TSFlags;

if (TSFlags &
(SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
switch (Desc.getOpcode()) {
// LD_SCALE operands ignore MSB.
if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 ||
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 ||
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 ||
Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250)
case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
return {};
case AMDGPU::V_FMAMK_F16:
case AMDGPU::V_FMAMK_F16_t16:
case AMDGPU::V_FMAMK_F16_t16_gfx12:
case AMDGPU::V_FMAMK_F16_fake16:
case AMDGPU::V_FMAMK_F16_fake16_gfx12:
case AMDGPU::V_FMAMK_F32:
case AMDGPU::V_FMAMK_F32_gfx12:
case AMDGPU::V_FMAMK_F64:
case AMDGPU::V_FMAMK_F64_gfx1250:
return {VOP2MADMKOps, nullptr};
default:
break;
}
return {VOPOps, nullptr};
}

Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,35 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v896.l*/, v129.l /*v897.l*/, v130.l /*v898.l*/
$vgpr896_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr897_lo16, 0, undef $vgpr898_lo16, 0, 0, 0, implicit $exec, implicit $mode
...

# ASM-LABEL: {{^}}fmaak_fmamk:
# DIS-LABEL: <fmaak_fmamk>:
---
name: fmaak_fmamk
tracksRegLiveness: true
body: |
bb.0:
; ASM: %bb.0:
; We use an extra instruction to set the MSB, and then we expect it to be reset to 0 (lower 16-bit).
; GCN: s_set_vgpr_msb 0xcf
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0xcf00
; GCN-NEXT: v_fmamk_f16 v26.l, v56.l, 0x1, v58.l
$vgpr26_lo16 = V_FMAMK_F16_t16 undef $vgpr56_lo16, 1, undef $vgpr58_lo16, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v0.l, v35.l, 0x1, v2.l
$vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr35_lo16, 1, undef $vgpr2_lo16, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v0.l, v2.l, 0x1, v6.l
$vgpr0_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr6_lo16, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v5.l, v2.l, 0x1, v4.l
$vgpr5_lo16 = V_FMAMK_F16_t16 undef $vgpr2_lo16, 1, undef $vgpr4_lo16, implicit $exec, implicit $mode
; ASM: NumVgprs: 771
...
39 changes: 34 additions & 5 deletions llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
Original file line number Diff line number Diff line change
Expand Up @@ -332,23 +332,52 @@ body: |
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x4445
; GCN-NEXT: s_set_vgpr_msb 0x4451
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x4505
; GCN-NEXT: s_set_vgpr_msb 0x5111
; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x541
; GCN-NEXT: s_set_vgpr_msb 0x1141
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x4144
; GCN-NEXT: s_set_vgpr_msb 0x4150
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, implicit $mode
; ASM: NumVgprs: 259
; GCN-NEXT: s_set_vgpr_msb 0x5051
; GCN-NEXT: v_fmamk_f64 v[4:5] /*v[260:261]*/, v[100:101] /*v[356:357]*/, 0x1, v[2:3] /*v[258:259]*/
$vgpr260_vgpr261 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr258_vgpr259, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x5101
; GCN-NEXT: v_fmamk_f64 v[0:1], v[100:101] /*v[356:357]*/, 0x1, v[2:3]
$vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr356_vgpr357, 1, undef $vgpr2_vgpr3, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x110
; GCN-NEXT: v_fmamk_f64 v[0:1], v[2:3], 0x1, v[100:101] /*v[356:357]*/
$vgpr0_vgpr1 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr356_vgpr357, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x1040
; GCN-NEXT: v_fmamk_f64 v[0:1] /*v[256:257]*/, v[2:3], 0x1, v[4:5]
$vgpr256_vgpr257 = V_FMAMK_F64 undef $vgpr2_vgpr3, 1, undef $vgpr4_vgpr5, implicit $exec, implicit $mode
; GCN-NEXT: s_set_vgpr_msb 0x4000
; GCN-NEXT: v_fmamk_f16 v26, v56, 0x1, v58
$vgpr26 = V_FMAMK_F16_fake16 undef $vgpr56, 1, undef $vgpr58, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v0, v35, 0x1, v2
$vgpr0 = V_FMAMK_F16_fake16 undef $vgpr35, 1, undef $vgpr2, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v0, v2, 0x1, v6
$vgpr0 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr6, implicit $exec, implicit $mode
; GCN-NEXT: v_fmamk_f16 v5, v2, 0x1, v4
$vgpr5 = V_FMAMK_F16_fake16 undef $vgpr2, 1, undef $vgpr4, implicit $exec, implicit $mode
; ASM: NumVgprs: 358
...

Expand Down
Loading