From dfcc9d2bd6957250dfa78260ccb62381bf5f02e0 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 18 Sep 2025 12:03:33 -0700 Subject: [PATCH 1/3] [AMDGPU] gfx1251 VOP1 dpp support --- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 65 +++++--- llvm/test/CodeGen/AMDGPU/dpp64_combine.mir | 1 + .../AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll | 25 +++ llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s | 98 +++++++++++ llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s | 156 ++++++++++++++++++ llvm/test/MC/AMDGPU/gfx1251_err.s | 6 + llvm/test/MC/AMDGPU/gfx9-asm-err.s | 2 +- .../AMDGPU/gfx1251_dasm_vop1_dpp16.txt | 49 ++++++ 8 files changed, 379 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_err.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index f816d7de27ee4..6230c17e20804 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -212,6 +212,11 @@ def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16 { } +def VOP_F64_F64_NO_DPP : VOPProfile <[f64, f64, untyped, untyped]> { + let HasExtVOP3DPP = 0; + let HasExt64BitDPP = 0; +} + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -344,9 +349,9 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; } // End TRANS = 1, SchedRW = [WriteTrans32] let TRANS = 1, SchedRW = [WriteTrans64] in { -defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; -defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; -defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; +defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64_NO_DPP, AMDGPUrcp>; +defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64_NO_DPP, AMDGPUrsq>; +defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64_NO_DPP, int_amdgcn_sqrt>; } // End TRANS = 1, SchedRW = [WriteTrans64] let TRANS = 1, SchedRW = [WriteTrans32] in { @@ -1025,6 +1030,11 @@ multiclass VOP1_Real_FULL_with_name op, string opName, multiclass VOP1_Real_NO_DPP op> : VOP1_Real_e32, VOP1_Real_e64; +multiclass VOP1_Real_with_DPP16 op> : + VOP1_Real_NO_DPP, + VOP1_Real_dpp, + VOP3_Real_dpp_Base; + multiclass VOP1_Real_FULL_t16_gfx11_gfx12 op, string asmName, string opName = NAME> : VOP1_Real_FULL_with_name, @@ -1057,6 +1067,11 @@ multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250< VOP1_Real_FULL_with_name; } +multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250 op, string opName, + string asmName> : + VOP1_Real_FULL_with_name, + VOP1_Real_FULL_with_name; + multiclass VOP1_Real_OpSelIsDPP_gfx1250 op> : VOP1_Real_e32 { defvar ps = !cast(NAME#"_e64"); def _e64_gfx1250 : @@ -1064,10 +1079,10 @@ multiclass VOP1_Real_OpSelIsDPP_gfx1250 op> : VOP1_Real_e32; } -defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; -defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; +defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; +defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name; -defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; +defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name; defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name; defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name; @@ -1252,17 +1267,17 @@ let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { multiclass VOP1_Real_gfx7 op> : VOP1_Real_e32_gfx7, VOP1_Real_e64_gfx7; -multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12 op> : +multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 op> : VOP1_Real_gfx7, VOP1_Real_gfx10, VOP1_Real_NO_DPP, - VOP1_Real_NO_DPP; + VOP1_Real_with_DPP16; defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; -defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; -defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; -defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; -defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; +defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x017>; +defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x018>; +defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x019>; +defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x01a>; //===----------------------------------------------------------------------===// // GFX6, GFX7, GFX10, GFX11, GFX12 @@ -1300,6 +1315,10 @@ multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12 op> : VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP, VOP1_Real_NO_DPP; +multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12 op> : + VOP1_Real_gfx6_gfx7_gfx10, VOP1_Real_NO_DPP, + VOP1_Real_with_DPP16; + multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op> : VOP1Only_Real_gfx6_gfx7, VOP1Only_Real_gfx10_gfx11_gfx12; @@ -1314,8 +1333,8 @@ defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>; -defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; -defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; +defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x003>; +defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x004>; defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; @@ -1325,14 +1344,14 @@ defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; -defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; -defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; +defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x00f>; +defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x010>; defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; -defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; -defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; +defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x015>; +defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x016>; defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; @@ -1354,9 +1373,9 @@ defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; -defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; -defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; -defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03c>; +defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03d>; +defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_with_DPP16_gfx12<0x03e>; defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; @@ -1410,7 +1429,9 @@ multiclass VOP1_Real_vi op> { if !cast(NAME#"_e32").Pfl.HasExtDPP then def _dpp_vi : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.VI>, - VOP1_DPPe(NAME#"_dpp")>; + VOP1_DPPe(NAME#"_dpp")> { + let AssemblerPredicate = isGFX8GFX9; + } } defm V_NOP : VOP1_Real_vi <0x0>; diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir index 84da231c95a62..8094dbaf418b8 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.mir @@ -1,5 +1,6 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s --check-prefix=GCN --- # GCN-LABEL: name: dpp64_old_impdef diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll new file mode 100644 index 0000000000000..7a2f8faae9e89 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.gfx1251.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -check-prefixes=GCN,GFX1251 %s + +; GCN-LABEL: {{^}}mov_dpp64_test: +; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 +; GCN: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 +define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) { + %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) #0 + store i64 %tmp0, ptr addrspace(1) %out + ret void +} + +; GCN-LABEL: {{^}}mov_dpp64_row_share_test: +; GFX12-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_share:1 row_mask:0x1 bank_mask:0x1 +; GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], v[{{[0-9:]+}}] row_share:1 row_mask:0x1 bank_mask:0x1 +define amdgpu_kernel void @mov_dpp64_row_share_test(ptr addrspace(1) %out, i64 %in1) { + %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 337, i32 1, i32 1, i1 0) #0 + store i64 %tmp0, ptr addrspace(1) %out + ret void +} + +declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) #0 + +attributes #0 = { nounwind readnone convergent } diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s new file mode 100644 index 0000000000000..bb1ccaf53ce32 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_dpp16.s @@ -0,0 +1,98 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s | FileCheck --check-prefixes=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1251: v_mov_b64_dpp v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mov_b64 v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_i32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_f32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_f32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_cvt_u32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_u32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_trunc_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_ceil_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_rndne_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_floor_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] row_share:1 +// GFX1251: v_frexp_exp_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_frexp_mant_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] row_share:1 +// GFX1251: v_fract_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s new file mode 100644 index 0000000000000..1d88e9cb59c8e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop1_err.s @@ -0,0 +1,156 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mov_b64 v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mov_b64 v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32 v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_floor_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64 v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fract_f64 v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rcp_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rcp_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rsq_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rsq_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sqrt_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_sqrt_f64 v[4:5], v[2:3] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1251_err.s b/llvm/test/MC/AMDGPU/gfx1251_err.s new file mode 100644 index 0000000000000..d4db1bf9bb780 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_err.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX1251-ERR --implicit-check-not=error: -strict-whitespace %s + +v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR: v_mov_b64 v[4:5], v[2:3] quad_perm:[1,1,1,1] +// GFX1251-ERR: ^ diff --git a/llvm/test/MC/AMDGPU/gfx9-asm-err.s b/llvm/test/MC/AMDGPU/gfx9-asm-err.s index 31e0d953b5bd8..eb1d7b0b90772 100644 --- a/llvm/test/MC/AMDGPU/gfx9-asm-err.s +++ b/llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -31,7 +31,7 @@ v_subrev_u16_e64 v5, v1, -4.2 // GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported v_cvt_u32_f64 v5, v[0:1] quad_perm:[0,2,1,1] row_mask:0xf bank_mask:0xf -// GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_lds_dword v[2:3], off // GFX9ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt new file mode 100644 index 0000000000000..3380b77a27a5d --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop1_dpp16.txt @@ -0,0 +1,49 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_mov_b64_dpp v[254:255], v[254:255] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30] +0xfa,0x3a,0xfc,0x7f,0xfe,0x53,0x05,0x30 + +# GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff] +0xfa,0x3a,0x08,0x7e,0x02,0x50,0x01,0xff + +# GFX1251: v_mov_b64_dpp v[4:5], v[2:3] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01] +0xfa,0x3a,0x08,0x7e,0x02,0x5f,0x01,0x01 + +# GFX1251: v_ceil_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x30,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x1e,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_f32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x20,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_i32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_u32_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff] +0xfa,0x2c,0x08,0x7e,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x06,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_u32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x2a,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_floor_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x34,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_fract_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x7c,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_exp_i32_f64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x78,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_mant_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x7a,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_rndne_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x32,0x04,0x7e,0x04,0x51,0x01,0xff + +# GFX1251: v_trunc_f64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff] +0xfa,0x2e,0x04,0x7e,0x04,0x51,0x01,0xff From 344bfe15f023e965348da4d92738b48683768887 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 18 Sep 2025 12:58:41 -0700 Subject: [PATCH 2/3] [AMDGPU] gfx1251 VOP2 dpp support --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 79 +++++++------ llvm/test/CodeGen/AMDGPU/dpp_combine.ll | 6 +- llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s | 74 ++++++++++++ llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s | 106 ++++++++++++++++++ .../AMDGPU/gfx1251_dasm_vop2_dpp16.txt | 37 ++++++ 5 files changed, 267 insertions(+), 35 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop2_dpp16.txt diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 46a1a4bf1ab4a..37d92bc5076de 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -287,10 +287,14 @@ multiclass VOP2bInst , Commutable_REV; - let SubtargetPredicate = isGFX11Plus in { - if P.HasExtVOP3DPP then - def _e64_dpp : VOP3_DPP_Pseudo ; - } // End SubtargetPredicate = isGFX11Plus + if P.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let SubtargetPredicate = isGFX11Plus; + } + else if P.HasExt64BitDPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let OtherPredicates = [HasDPALU_DPP]; + } } } @@ -345,10 +349,14 @@ multiclass VOPD_Component; } - let SubtargetPredicate = isGFX11Plus in { - if P.HasExtVOP3DPP then - def _e64_dpp : VOP3_DPP_Pseudo ; - } // End SubtargetPredicate = isGFX11Plus + if P.HasExtVOP3DPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let SubtargetPredicate = isGFX11Plus; + } + else if P.HasExt64BitDPP then + def _e64_dpp : VOP3_DPP_Pseudo { + let OtherPredicates = [HasDPALU_DPP]; + } } } @@ -1607,8 +1615,9 @@ multiclass VOP2_Real_dpp op> { } multiclass VOP2_Real_dpp8 op> { - if !cast(NAME#"_e32").Pfl.HasExtDPP then - def _dpp8#Gen.Suffix : VOP2_DPP8_Gen(NAME#"_e32"), Gen>; + defvar ps = !cast(NAME#"_e32"); + if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then + def _dpp8#Gen.Suffix : VOP2_DPP8_Gen; } //===------------------------- VOP2 (with name) -------------------------===// @@ -1643,10 +1652,10 @@ multiclass VOP2_Real_dpp_with_name op, string opName, multiclass VOP2_Real_dpp8_with_name op, string opName, string asmName> { defvar ps = !cast(opName#"_e32"); - if ps.Pfl.HasExtDPP then - def _dpp8#Gen.Suffix : VOP2_DPP8_Gen { - let AsmString = asmName # ps.Pfl.AsmDPP8; - } + if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then + def _dpp8#Gen.Suffix : VOP2_DPP8_Gen { + let AsmString = asmName # ps.Pfl.AsmDPP8; + } } //===------------------------------ VOP2be ------------------------------===// @@ -1687,32 +1696,32 @@ multiclass VOP2be_Real_dpp op, string opName, string asmName } } multiclass VOP2be_Real_dpp8 op, string opName, string asmName> { - if !cast(opName#"_e32").Pfl.HasExtDPP then + defvar ps = !cast(opName#"_e32"); + if !and(ps.Pfl.HasExtDPP, !not(ps.Pfl.HasExt64BitDPP)) then { def _dpp8#Gen.Suffix : - VOP2_DPP8_Gen(opName#"_e32"), Gen> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + VOP2_DPP8_Gen { + string AsmDPP8 = ps.Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); } - if !cast(opName#"_e32").Pfl.HasExtDPP then def _dpp8_w32#Gen.Suffix : - VOP2_DPP8(opName#"_e32")> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + VOP2_DPP8 { + string AsmDPP8 = ps.Pfl.AsmDPP8; let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); let isAsmParserOnly = 1; let WaveSizePredicate = isWave32; let AssemblerPredicate = Gen.AssemblerPredicate; let DecoderNamespace = Gen.DecoderNamespace; } - if !cast(opName#"_e32").Pfl.HasExtDPP then def _dpp8_w64#Gen.Suffix : - VOP2_DPP8(opName#"_e32")> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + VOP2_DPP8 { + string AsmDPP8 = ps.Pfl.AsmDPP8; let AsmString = asmName # AsmDPP8; let isAsmParserOnly = 1; let WaveSizePredicate = isWave64; let AssemblerPredicate = Gen.AssemblerPredicate; let DecoderNamespace = Gen.DecoderNamespace; } + } } // We don't want to override separate decoderNamespaces within these @@ -1777,9 +1786,11 @@ multiclass VOP2_Real_NO_DPP_with_name op, string opName, } } -multiclass VOP2_Real_NO_DPP_with_alias op, string alias> { +multiclass VOP2_Real_with_DPP16_with_alias op, string alias> { defm NAME : VOP2_Real_e32, - VOP2_Real_e64; + VOP2_Real_dpp, + VOP2_Real_e64, + VOP3_Real_dpp_Base; def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias { let AssemblerPredicate = Gen.AssemblerPredicate; } @@ -1808,6 +1819,9 @@ multiclass VOP2_Real_FULL_t16_gfx12 op, string opName, } } +multiclass VOP2_Real_with_DPP16_with_alias_gfx12 op, string alias> : + VOP2_Real_with_DPP16_with_alias; + multiclass VOP2_Real_FULL_t16_and_fake16_gfx12 op, string opName, string asmName, string alias> { defm _t16: VOP2_Real_FULL_t16_gfx12; @@ -1818,14 +1832,11 @@ multiclass VOP2_Real_NO_DPP_with_name_gfx12 op, string opName, string asmName> : VOP2_Real_NO_DPP_with_name; -multiclass VOP2_Real_NO_DPP_with_alias_gfx12 op, string alias> : - VOP2_Real_NO_DPP_with_alias; - -defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; -defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; -defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; -defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">; -defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">; +defm V_ADD_F64 : VOP2_Real_FULL_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; +defm V_MUL_F64 : VOP2_Real_FULL_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; +defm V_LSHLREV_B64 : VOP2_Real_FULL_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; +defm V_MIN_NUM_F64 : VOP2_Real_with_DPP16_with_alias_gfx12<0x00d, "v_min_f64">; +defm V_MAX_NUM_F64 : VOP2_Real_with_DPP16_with_alias_gfx12<0x00e, "v_max_f64">; defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; defm V_ADD_CO_CI_U32 : @@ -2776,7 +2787,7 @@ let DecoderNamespace = "GFX90A" in { } } // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" -let SubtargetPredicate = HasFmacF64Inst in { +let SubtargetPredicate = HasFmacF64Inst, OtherPredicates = [isGFX9Only] in { defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; } // End SubtargetPredicate = HasFmacF64Inst diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll index 539485d19a2b9..a3251bdfafebf 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll @@ -4,6 +4,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=+real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -mattr=-real-true16 < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 ; GCN-LABEL: {{^}}dpp_add: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], @@ -49,7 +51,9 @@ define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) { ret void } -; Fails to combine because v_mul_lo_u32 has no e32 or dpp form. +; Fails to combine prior to gfx1251 because v_mul_lo_u32 has no e32 or dpp form. +; Fails to combine on gfx1251 because DPP control value is invalid for DP DPP and v_mul_lo_u32 is +; classified as DP DPP. ; GCN-LABEL: {{^}}dpp_mul: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], ; GCN: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]] diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s new file mode 100644 index 0000000000000..38bbc69fb3a72 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_dpp16.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x53,0x05,0x30] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250-ERR-NEXT:{{^}} ^ + +v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x5f,0x01,0x01] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x53,0x05,0x30] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x5f,0x01,0x01] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_fmac_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_fmac_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x2e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_add_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_add_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_mul_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x0c,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_max_num_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_max_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1c,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_min_num_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_min_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1a,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64 v[4:5], v2, v[4:5] row_share:1 +// GFX1251: v_lshlrev_b64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x3e,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s new file mode 100644 index 0000000000000..99d781d1e0fa1 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop2_err.s @@ -0,0 +1,106 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64 v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64 v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mul_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64 v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64 v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64 v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_u64 v[2:3], v[4:5], v[6:7] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mul_u64 v[2:3], v[4:5], v[6:7] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmamk_f64 v[4:5], v[2:3], 123.0, v[6:7] row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmaak_f64 v[4:5], v[2:3], v[6:7], 123.0 row_share:1 +// GFX1251-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop2_dpp16.txt new file mode 100644 index 0000000000000..a92b81b9bb486 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop2_dpp16.txt @@ -0,0 +1,37 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1251 %s + +# GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x50,0x01,0xff] +0xfa,0x08,0x08,0x50,0x02,0x50,0x01,0xff + +# GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x5f,0x01,0x01] +0xfa,0x08,0x08,0x50,0x02,0x5f,0x01,0x01 + +# GFX1251: v_add_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x50,0x02,0x53,0x05,0x30] +0xfa,0x08,0x08,0x50,0x02,0x53,0x05,0x30 + +# GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x50,0x01,0xff] +0xfa,0x08,0x08,0x52,0x02,0x50,0x01,0xff + +# GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x5f,0x01,0x01] +0xfa,0x08,0x08,0x52,0x02,0x5f,0x01,0x01 + +# GFX1251: v_sub_nc_u64_dpp v[4:5], v[2:3], v[4:5] row_share:3 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x08,0x08,0x52,0x02,0x53,0x05,0x30] +0xfa,0x08,0x08,0x52,0x02,0x53,0x05,0x30 + +# GFX1251: v_add_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x04,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_fmac_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x2e,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x2e,0x02,0x51,0x01,0xff + +# GFX1251: v_lshlrev_b64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x3e,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x3e,0x02,0x51,0x01,0xff + +# GFX1251: v_max_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1c,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x1c,0x02,0x51,0x01,0xff + +# GFX1251: v_min_num_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x1a,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x1a,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_f64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x08,0x08,0x0c,0x02,0x51,0x01,0xff] +0xfa,0x08,0x08,0x0c,0x02,0x51,0x01,0xff From b83405b879b471da983f885bfdffb3d1f58130de Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 18 Sep 2025 14:30:20 -0700 Subject: [PATCH 3/3] [AMDGPU] gfx1251 VOP3 dpp support --- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 1 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 64 ++++++-- llvm/lib/Target/AMDGPU/VOPInstructions.td | 78 +++++---- llvm/test/CodeGen/AMDGPU/dpp64_combine.ll | 4 + llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s | 150 ++++++++++++++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s | 58 +++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop1_err.s | 150 ++++++++++++++++++ .../AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s | 34 ++++ .../AMDGPU/gfx1251_asm_vop3_from_vop2_err.s | 93 +++++++++++ llvm/test/MC/AMDGPU/vop3-gfx9.s | 4 +- .../AMDGPU/gfx1251_dasm_vop3_dpp16.txt | 94 +++++++++++ .../gfx1251_dasm_vop3_from_vop1_dpp16.txt | 43 +++++ .../gfx1251_dasm_vop3_from_vop2_dpp16.txt | 25 +++ 13 files changed, 745 insertions(+), 53 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s create mode 100644 llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c49f1930705aa..18fae6cfc7ed9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1969,6 +1969,7 @@ class getVOP3DPPSrcForVT { RegisterOperand ret = !cond(!eq(VT, i1) : SSrc_i1, !eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16), + !eq(VT, i64) : VCSrc_b64, !eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16), !eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16), !eq(VT, v2i16) : VCSrc_v2b16, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 582a353632436..e6a7c35dce0be 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -24,6 +24,7 @@ def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { let Outs64 = (outs DstRC.RegClass:$vdst); + let HasExt64BitDPP = 1; let IsSingle = 1; } } @@ -51,7 +52,24 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { let HasExt64BitDPP = 1 in { def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile; -def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile; +def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile { + let OutsVOP3DPP = Outs64; + let AsmVOP3DPP = getAsmVOP3DPP.ret; + let AsmVOP3DPP16 = getAsmVOP3DPP16.ret; + let AsmVOP3DPP8 = getAsmVOP3DPP8.ret; +} + +def VOP3b_I64_I1_I32_I32_I64_DPP : VOPProfile<[i64, i32, i32, i64]> { + let HasClamp = 1; + + let IsSingle = 1; + let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); + let OutsVOP3DPP = Outs64; + let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; + let AsmVOP3DPP = getAsmVOP3DPP.ret; + let AsmVOP3DPP16 = getAsmVOP3DPP16.ret; + let AsmVOP3DPP8 = getAsmVOP3DPP8.ret; +} class V_MUL_PROF : VOP3_Profile

{ let HasExtVOP3DPP = 0; @@ -229,7 +247,7 @@ defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32 // result *= 2^64 // let SchedRW = [WriteDouble], FPDPRounding = 1 in -defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>; +defm V_DIV_FMAS_F64 : VOP3Inst <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC>; } // End Uses = [MODE, VCC, EXEC] } // End isCommutable = 1 @@ -294,7 +312,7 @@ defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile; let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { - defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUdiv_fixup>; + defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP_F64_F64_F64_F64_DPP_PROF, AMDGPUdiv_fixup>; defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile, any_fldexp>; } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 } // End isReMaterializable = 1 @@ -335,7 +353,7 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d // Double precision division pre-scale. let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in - defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>; + defm V_DIV_SCALE_F64 : VOP3Inst <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>; } // End mayRaiseFPException = 0 let isReMaterializable = 1 in @@ -408,9 +426,9 @@ defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End SubtargetPredicate = isGFX7Plus let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { - let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in { - defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; - defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; + let SubtargetPredicate = isGFX7Plus in { + defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>; + defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64_DPP, null_frag, [HasNotMADIntraFwdBug]>; } let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug], Constraints = "@earlyclobber $vdst" in { @@ -2054,8 +2072,8 @@ defm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>; defm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>; defm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">; defm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">; -defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>; -defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>; +defm V_MINIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x341>; +defm V_MAXIMUM_F64 : VOP3Only_Realtriple_gfx11_gfx12<0x342>; defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>; defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>; defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">; @@ -2127,6 +2145,13 @@ multiclass VOP3be_Real_gfx11_gfx12 op, string opName, string asmName> : VOP3be_Real, VOP3be_Real; +multiclass VOP3be_Real_gfx11_gfx12_not_gfx1250 op, string opName, string asmName> : + VOP3be_Real, + VOP3be_Real; + +multiclass VOP3be_Realtriple_gfx1250 op> : + VOP3be_Realtriple; + multiclass VOP3_Real_No_Suffix_gfx11_gfx12 op> : VOP3_Real_No_Suffix, VOP3_Real_No_Suffix; @@ -2141,7 +2166,7 @@ defm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>; defm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>; defm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>; defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; -defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; +defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x216, "v_alignbit_b32">; defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; @@ -2161,9 +2186,9 @@ defm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>; defm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>; defm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>; defm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>; -defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12<0x228>; +defm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x228>; defm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>; -defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12<0x238>; +defm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x238>; defm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>; defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>; defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>; @@ -2205,7 +2230,7 @@ defm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>; defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">; defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; -defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; +defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12_not_gfx1250<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">; defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">; @@ -2228,7 +2253,7 @@ defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; defm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>; defm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>; defm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>; -defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>; +defm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32b>; defm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32c>; defm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32d>; defm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x32e>; @@ -2237,8 +2262,8 @@ defm V_LSHLREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33 defm V_LSHRREV_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">; defm V_ASHRREV_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">; defm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; -defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>; -defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>; +defm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33d>; +defm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12_not_gfx1250<0x33e>; defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2 let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2 @@ -2260,9 +2285,16 @@ let AssemblerPredicate = isGFX11Plus in { } // These instructions differ from GFX12 variant by supporting DPP: +defm V_FMA_F64 : VOP3Only_Realtriple_gfx1250<0x214>; +defm V_DIV_FIXUP_F64 : VOP3Only_Realtriple_gfx1250<0x228>; +defm V_DIV_FMAS_F64 : VOP3Only_Realtriple_gfx1250<0x238>; +defm V_DIV_SCALE_F64 : VOP3be_Realtriple_gfx1250<0x2fd>; +defm V_LDEXP_F64 : VOP3Only_Realtriple_gfx1250<0x32b>; defm V_MUL_LO_U32 : VOP3Only_Realtriple_gfx1250<0x32c>; defm V_MUL_HI_U32 : VOP3Only_Realtriple_gfx1250<0x32d>; defm V_MUL_HI_I32 : VOP3Only_Realtriple_gfx1250<0x32e>; +defm V_LSHRREV_B64 : VOP3Only_Realtriple_gfx1250<0x33d>; +defm V_ASHRREV_I64 : VOP3Only_Realtriple_gfx1250<0x33e>; defm V_PERM_PK16_B4_U4 : VOP3Only_Real_Base_gfx1250<0x23f>; defm V_PERM_PK16_B6_U4 : VOP3Only_Real_Base_gfx1250<0x242>; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index b900510d7622a..631f0f3318cd1 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1041,8 +1041,9 @@ class VOP3_DPP_Pseudo : let Size = 12; let VOP3 = 1; let AsmMatchConverter = "cvtVOP3DPP"; - let AsmVariantName = !if(P.HasExtVOP3DPP, AMDGPUAsmVariants.VOP3_DPP, - AMDGPUAsmVariants.Disable); + let AsmVariantName = !if(!or(P.HasExtVOP3DPP, P.HasExt64BitDPP), + AMDGPUAsmVariants.VOP3_DPP, + AMDGPUAsmVariants.Disable); } class VOP_DPP_Real : @@ -1115,8 +1116,9 @@ class VOP3_DPP_Base op, string opName, } } } - def Gen.Suffix#"_VOP3_alias" : LetDummies, - AMDGPUMnemonicAlias { - let AssemblerPredicate = Gen.AssemblerPredicate; + if !ne(ps.Mnemonic, asmName) then { + def Gen.Suffix#"_VOP3_alias" : LetDummies, + AMDGPUMnemonicAlias { + let AssemblerPredicate = Gen.AssemblerPredicate; + } } } @@ -1902,33 +1906,36 @@ multiclass VOP3_Real_dpp_with_name op, string opName, multiclass VOP3_Real_dpp8_Base op, string opName = NAME> { defvar ps = !cast(opName#"_e64"); - def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8 { - let DecoderNamespace = Gen.DecoderNamespace; - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8 { + let DecoderNamespace = Gen.DecoderNamespace; + let AssemblerPredicate = Gen.AssemblerPredicate; + } } multiclass VOP3Dot_Real_dpp8_Base op, string asmName, string opName = NAME> { defvar ps = !cast(opName#"_e64"); - def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8_t16 { - let Inst{11} = ?; - let Inst{12} = ?; - let AsmString = asmName # ps.Pfl.AsmVOP3DPP8; - let DecoderNamespace = Gen.DecoderNamespace - # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : Base_VOP3_DPP8 { + let Inst{11} = ?; + let Inst{12} = ?; + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8; + let DecoderNamespace = Gen.DecoderNamespace + # !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); + let AssemblerPredicate = Gen.AssemblerPredicate; + } } multiclass VOP3_Real_dpp8_with_name op, string opName, string asmName> { defvar ps = !cast(opName#"_e64"); - let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, - DecoderNamespace = Gen.DecoderNamespace# - !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), - True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - NoTrue16Predicate) in { - defm NAME : VOP3_Real_dpp8_Base; + if !not(ps.Pfl.HasExt64BitDPP) then + let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, + DecoderNamespace = Gen.DecoderNamespace# + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), + True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, + NoTrue16Predicate) in { + defm NAME : VOP3_Real_dpp8_Base; } } @@ -1955,10 +1962,11 @@ multiclass VOP3be_Real_dpp op, string opName, multiclass VOP3be_Real_dpp8 op, string opName, string asmName> { defvar ps = !cast(opName #"_e64"); - def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base { - let DecoderNamespace = Gen.DecoderNamespace; - let AssemblerPredicate = Gen.AssemblerPredicate; - } + if !not(ps.Pfl.HasExt64BitDPP) then + def _e64_dpp8#Gen.Suffix : VOP3b_DPP8_Base { + let DecoderNamespace = Gen.DecoderNamespace; + let AssemblerPredicate = Gen.AssemblerPredicate; + } } // VOP1 and VOP2 depend on these triple defs @@ -2105,6 +2113,9 @@ multiclass VOP3Only_Real_Base_gfx1250 op> : multiclass VOP3Only_Realtriple_gfx1250 op, bit isSingle = 0> : VOP3_Realtriple; +multiclass VOP3Only_Realtriple_gfx12_not_gfx1250 op, bit isSingle = 0> : + VOP3_Realtriple; + multiclass VOP3Only_Realtriple_with_name_gfx1250 op, string opName, string asmName, string pseudo_mnemonic = "", bit isSingle = 0> : @@ -2144,11 +2155,8 @@ multiclass VOP3Only_Realtriple_t16_and_fake16_gfx1250 op, multiclass VOP3be_Real_with_name_gfx12 op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast(opName#"_e64"); - let AsmString = asmName # ps.AsmOperands, - IsSingle = !or(isSingle, ps.Pfl.IsSingle) in - def _e64_gfx12 : - VOP3_Real_Gen, - VOP3be_gfx11_gfx12; + defm NAME : VOP3be_Realtriple; def : AMDGPUMnemonicAlias { let AssemblerPredicate = GFX12Gen.AssemblerPredicate; } diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll index 43f6def22d981..6c226bd12d79c 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll @@ -3,6 +3,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX10 -DCTL=row_share ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX10PLUS,GFX11 -DCTL=row_share ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefixes=GCN,DPP32,GFX1250 -DCTL=row_share +; RUN: llc -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck %s -check-prefixes=GCN,DPP64,DPPMOV64,DPP64-GFX1251 -DCTL=row_share ; GCN-LABEL: {{^}}dpp64_ceil: ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], @@ -23,6 +24,8 @@ define amdgpu_kernel void @dpp64_ceil(ptr addrspace(1) %arg, i64 %in1) { ; GCN-LABEL: {{^}}dpp64_rcp: ; GCN: global_load_{{dwordx2|b64}} [[V:v\[[0-9:]+\]]], ; DPP64-GFX9: v_rcp_f64_dpp [[V]], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} +; DPP64-GFX1251: v_mov_b64_dpp v[{{[0-9:]+}}], [[V]] [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} +; DPP64-GFX1251: v_rcp_f64_e32 ; DPP32-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} [[CTL]]:1 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} define amdgpu_kernel void @dpp64_rcp(ptr addrspace(1) %arg, i64 %in1) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -79,6 +82,7 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) { ; GFX1250: v_mov_b32_e32 [[V2:v[0-9]+]], [[V]] ; GFX1250: v_mov_b32_dpp [[V2]], [[V2]] {{row_share|row_newbcast}}:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} ; GFX1250: v_mul_lo_u32 [[V]], [[V2]], [[V]]{{$}} +; DPP64-GFX1251: v_mul_lo_u32_e64_dpp [[V]], [[V]], [[V]] [[CTL]]:0 row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} define amdgpu_kernel void @dpp_mul_row_share(ptr addrspace(1) %arg) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s new file mode 100644 index 0000000000000..d3a22a995673e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_dpp16.s @@ -0,0 +1,150 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_lshl_add_u64 v[2:3], v[4:5], v7, v[8:9] row_share:3 +// GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_lshl_add_u64 v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_fma_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_fma_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_fma_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_fixup_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_fixup_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_fixup_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_fmas_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_fmas_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_fmas_f64 v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_div_scale_f64 v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1251: v_div_scale_f64_e64_dpp v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_div_scale_f64 v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_co_u64_u32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1251: v_mad_co_u64_u32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mad_co_u64_u32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_co_i64_i32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1251: v_mad_co_i64_i32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mad_co_i64_i32 v[4:5], s2, v2, v6, v[8:9] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_minimum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1251: v_minimum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_minimum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_maximum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1251: v_maximum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_maximum_f64 v[4:5], v[2:3], v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ldexp_f64 v[4:5], v[2:3], v6 row_share:1 +// GFX1251: v_ldexp_f64_e64_dpp v[4:5], v[2:3], v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ldexp_f64 v[4:5], v[2:3], v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_lo_u32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_lo_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_lo_u32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_hi_u32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_hi_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_hi_u32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mul_hi_i32 v4, v2, v6 row_share:1 +// GFX1251: v_mul_hi_i32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_mul_hi_i32 v4, v2, v6 row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_lshrrev_b64 v[4:5], v2, v[6:7] row_share:1 +// GFX1251: v_lshrrev_b64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_lshrrev_b64 v[4:5], v2, v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_ashrrev_i64 v[4:5], v2, v[6:7] row_share:1 +// GFX1251: v_ashrrev_i64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. +// GFX1250-ERR-NEXT:{{^}}v_ashrrev_i64 v[4:5], v2, v[6:7] row_share:1 +// GFX1250-ERR-NEXT:{{^}} ^ + +v_mad_u32 v2, v4, v7, v8 row_share:3 fi:1 +// GFX1251: v_mad_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_u32 v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_i64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_i64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_u64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_max_u64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_i64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_i64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_u64 v[2:3], v[4:5], v[6:7] row_share:3 fi:1 +// GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_min_u64 v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_u64_u32 v[2:3], v4, v7, v[8:9] row_share:3 fi:1 +// GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_u64_u32 v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_i64_i32 v[2:3], v4, v7, v[8:9] row_share:3 fi:1 +// GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. + +v_mad_nc_i64_i32 v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s new file mode 100644 index 0000000000000..19b138e63bb93 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_dpp16.s @@ -0,0 +1,58 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251: v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 +// GFX1251: v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 +// GFX1251: v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 +// GFX1251: v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s new file mode 100644 index 0000000000000..8b30278c7820c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop1_err.s @@ -0,0 +1,150 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_mov_b64_e64_dpp v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mov_b64_e64_dpp v[4:5], v[2:3] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32_e64_dpp v[4:5], v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_floor_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fract_f64_e64_dpp v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mov_b64_e64_dpp v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mov_b64_e64_dpp v[4:5], v[2:3] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_i32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_i32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_f32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_f32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_u32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_u32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_cvt_f64_u32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_cvt_f64_u32_e64_dpp v[4:5], v2 quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_trunc_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_trunc_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_ceil_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_ceil_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rndne_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_rndne_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_floor_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_floor_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fract_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fract_f64_e64_dpp v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_rcp_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_rsq_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_sqrt_f64_e64_dpp v[4:5], v[2:3] row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s new file mode 100644 index 0000000000000..f4c7f6ac336dd --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_dpp16.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding < %s | FileCheck --check-prefix=GFX1251 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 +// GFX1251: v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 +// GFX1251: v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +// GFX1250-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s new file mode 100644 index 0000000000000..0ff0d4288d984 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1251_asm_vop3_from_vop2_err.s @@ -0,0 +1,93 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1251 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1251-ERR --implicit-check-not=error: --strict-whitespace %s + +v_add_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64_e64_dpp v[2:3], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] dpp8:[7,6,5,4,3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: DP ALU dpp only supports row_share +// GFX1251-ERR-NEXT:{{^}}v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] quad_perm:[3,2,1,0] +// GFX1251-ERR-NEXT:{{^}} ^ + +v_fmaak_f32_e64_dpp v4, v2, v6, 3 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmamk_f32_e64_dpp v4, v2, 3, v6 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmaak_f16_e64_dpp v4, v2, v6, 3 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported + +v_fmamk_f16_e64_dpp v4, v2, 3, v6 row_share:1 +// GFX1251-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: e64_dpp variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s index a61b0c87e199f..f98f33a979bc5 100644 --- a/llvm/test/MC/AMDGPU/vop3-gfx9.s +++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -723,8 +723,8 @@ v_add_f64 v[0:1], s0, v[0:1] // NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_add_f64 v[0:1], s[0:3], v[0:1] -// NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: too few operands for instruction +// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: too few operands for instruction v_add_f64 v[0:1], v0, v[0:1] // NOGCN: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt new file mode 100644 index 0000000000000..056250d2dd436 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_dpp16.txt @@ -0,0 +1,94 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1251 %s + +# GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v4, v[2:3] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff] +0x02,0x00,0x52,0xd6,0xfa,0x08,0x0a,0x04,0x04,0x50,0x01,0xff + +# GFX1251: v_lshl_add_u64_e64_dpp v[2:3], v[4:5], v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff] +0x02,0x00,0x52,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x01,0xff + +# GFX1251: v_ashrrev_i64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x3e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_div_fixup_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x28,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_div_fmas_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x38,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# W32: v_div_scale_f64_e64_dpp v[4:5], s2, v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_div_scale_f64_e64_dpp v[4:5], s[2:3], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xfd,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_fma_f64_e64_dpp v[4:5], v[2:3], v[6:7], v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x00,0x14,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_ldexp_f64_e64_dpp v[4:5], v[2:3], v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2b,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_lshrrev_b64_e64_dpp v[4:5], v2, v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x3d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# W32: v_mad_co_i64_i32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_mad_co_i64_i32_e64_dpp v[4:5], s[2:3], v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xff,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# W32: v_mad_co_u64_u32_e64_dpp v[4:5], s2, v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +# W64: v_mad_co_u64_u32_e64_dpp v[4:5], s[2:3], v2, v6, v[8:9] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff] +0x04,0x02,0xfe,0xd6,0xfa,0x0c,0x22,0x04,0x02,0x51,0x01,0xff + +# GFX1251: v_maximum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x42,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_minimum_f64_e64_dpp v[4:5], v[2:3], v[6:7] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x41,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_hi_i32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2e,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_hi_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2d,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_lo_u32_e64_dpp v4, v2, v6 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x2c,0xd7,0xfa,0x0c,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mad_u32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0x35,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_u32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0x35,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff + +# GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_max_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x1b,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_max_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x19,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_min_i64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x1a,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff] +0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x50,0x01,0xff + +# GFX1251: v_min_u64_e64_dpp v[2:3], v[4:5], v[6:7] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff] +0x02,0x00,0x18,0xd7,0xfa,0x0c,0x02,0x00,0x04,0x53,0x05,0xff + +# GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0xfa,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_nc_u64_u32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0xfa,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff + +# GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v5, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff] +0x02,0x00,0xfb,0xd6,0xfa,0x0a,0x06,0x02,0x04,0x50,0x01,0xff + +# GFX1251: v_mad_nc_i64_i32_e64_dpp v[2:3], v4, v7, v[8:9] row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] +0x02,0x00,0xfb,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt new file mode 100644 index 0000000000000..3bc7b01740061 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop1_dpp16.txt @@ -0,0 +1,43 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_ceil_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x98,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x8f,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_f32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x90,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_i32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x84,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_f64_u32_e64_dpp v[4:5], v2 row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x96,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_cvt_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x83,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_cvt_u32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x95,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_floor_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x9a,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_fract_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbe,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_exp_i32_f64_e64_dpp v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbc,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_frexp_mant_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0xbd,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_mov_b64_e64_dpp v[4:5], v[2:3] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x9d,0xd5,0xfa,0x00,0x00,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_rndne_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x99,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff + +# GFX1251: v_trunc_f64_e64_dpp v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff] +0x02,0x00,0x97,0xd5,0xfa,0x00,0x00,0x00,0x04,0x51,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt new file mode 100644 index 0000000000000..d379f6968eaf6 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1251_dasm_vop3_from_vop2_dpp16.txt @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1251 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1251 %s + +# GFX1251: v_add_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x02,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_add_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x28,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_fmac_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x17,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_lshlrev_b64_e64_dpp v[4:5], v2, v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x1f,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_max_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x0e,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_min_num_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x0d,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_mul_f64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x06,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff + +# GFX1251: v_sub_nc_u64_e64_dpp v[4:5], v[2:3], v[4:5] row_share:1 row_mask:0xf bank_mask:0xf ; encoding: [0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff] +0x04,0x00,0x29,0xd5,0xfa,0x08,0x02,0x00,0x02,0x51,0x01,0xff