diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0c7e20fc1ebf3..19c13804dd330 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1945,6 +1945,14 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { if !cast(NAME#"_e32").Pfl.HasExt32BitDPP then def _dpp8_gfx10 : VOP2_DPP8(NAME#"_e32")>; } + multiclass VOP2Only_Real_e32_gfx10 op> { + let IsSingle = 1 in + defm NAME: VOP2_Real_e32_gfx10; + } + multiclass VOP2_Real_e32_dpp_dpp8_gfx10 op> : + VOP2Only_Real_e32_gfx10, + VOP2_Real_dpp_gfx10, + VOP2_Real_dpp8_gfx10; //===------------------------- VOP2 (with name) -------------------------===// multiclass VOP2_Real_e32_gfx10_with_name op, string opName, @@ -2168,10 +2176,7 @@ defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; - -let IsSingle = 1 in { - defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; -} +defm V_PK_FMAC_F16 : VOP2_Real_e32_dpp_dpp8_gfx10<0x03c>; // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : @@ -2560,6 +2565,7 @@ defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_s defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; +defm V_PK_FMAC_F16 : VOP2_Real_e32e64_gfx9<0x03c>; } // End AssemblerPredicate = isGFX9Only defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s index 3dcf288bbbaa5..bbd36a9140b96 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -13185,3 +13185,15 @@ v_pk_fmac_f16 v5, -4.0, v2 v_pk_fmac_f16 v5, v1, v255 // GFX10: encoding: [0x01,0xff,0x0b,0x78] + +v_pk_fmac_f16 v5, v1, v2 +// GFX10: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] + +v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX10: encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX10: encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s b/llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s index 88db110ad9c20..681b34e4c1c56 100644 --- a/llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s +++ b/llvm/test/MC/AMDGPU/gfx10_unsupported_sdwa.s @@ -32,6 +32,9 @@ v_min_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD s v_mul_lo_u16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported +v_pk_fmac_f16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported + v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s new file mode 100644 index 0000000000000..4b5efd00a7adf --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop2_features.s @@ -0,0 +1,91 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=CHECK-MI %s + +v_pk_fmac_f16 v5, v1, v2 +// CHECK-MI: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] + +v_pk_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// CHECK-MI: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x00,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x01,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x02,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x03,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x04,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x05,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x0e,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x00,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x01,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x02,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x03,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x04,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x05,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x00] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x01] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x02] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x03] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x04] + +v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05] + +v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// CHECK-MI: [0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt index 1774efa4a65c7..4a7471e6c6f98 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp16.txt @@ -2476,3 +2476,10 @@ # W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] # W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] 0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00 + diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt index 40b8f24e4d72f..233f93a5b8e7d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop2_dpp8.txt @@ -222,3 +222,6 @@ # W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] # W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] 0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa + +# GFX10: v_pk_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05] +0xe9,0x04,0x0a,0x78,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt new file mode 100644 index 0000000000000..2b8d58853847b --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop2_features.txt @@ -0,0 +1,92 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx908 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx942 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=CHECK-MI %s + +# CHECK-MI: v_pk_fmac_f16_e32 v5, v1, v2 +0x01,0x05,0x0a,0x78 + +# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0xff + +# CHECK-MI: v_pk_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x78,0x01,0xe4,0x00,0x00 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x00,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x01,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x02,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x03,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x04,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x05,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x0e,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x16,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x00,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x01,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x02,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x03,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x04,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x05,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x06 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x00 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x01 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x02 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x03 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x04 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x05 + +# CHECK-MI: v_pk_fmac_f16_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x78,0x01,0x06,0x06,0x16 +