From 625ae52fa01bbde1b9ee61b36d92f2922fb58bee Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 19 Dec 2024 22:59:17 -0500 Subject: [PATCH] true16 for v_sat_pk_u8_i16 --- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1.s | 69 +++++++++++-------- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s | 65 +++++++++-------- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s | 21 ++++-- llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s | 18 +++++ .../MC/AMDGPU/gfx11_asm_vop1_t16_promote.s | 21 ++++-- .../AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s | 59 ++++++++-------- .../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s | 15 ++-- .../test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s | 63 +++++++++-------- llvm/test/MC/AMDGPU/gfx12_asm_vop1.s | 51 ++++++++++---- llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s | 6 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s | 6 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s | 18 +++++ .../MC/AMDGPU/gfx12_asm_vop1_t16_promote.s | 9 +++ .../test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s | 3 + .../AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s | 3 + .../MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s | 3 + .../Disassembler/AMDGPU/gfx11_dasm_vop1.txt | 55 +++++++++++---- .../AMDGPU/gfx11_dasm_vop1_dpp16.txt | 54 +++++++++++---- .../AMDGPU/gfx11_dasm_vop1_dpp8.txt | 17 ++++- .../gfx11_dasm_vop3_dpp16_from_vop1.txt | 46 +++++++++---- .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 10 ++- .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 49 +++++++++---- .../AMDGPU/gfx12_dasm_vop1_dpp16.txt | 50 ++++++++++---- .../AMDGPU/gfx12_dasm_vop1_dpp8.txt | 13 +++- .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 49 +++++++++---- .../gfx12_dasm_vop3_from_vop1_dpp16.txt | 46 +++++++++---- .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 10 ++- 28 files changed, 572 insertions(+), 259 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1dd39be9e8d9c..bbb456ab739ab 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1047,7 +1047,7 @@ defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f1 defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; -defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; +defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index 4e4dc6647daeb..4448720e6f79f 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -3236,50 +3236,59 @@ v_rsq_f64 v[5:6], src_scc v_rsq_f64 v[254:255], 0xaf123456 // GFX11: v_rsq_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x62,0xfc,0x7f,0x56,0x34,0x12,0xaf] -v_sat_pk_u8_i16 v5, v1 -// GFX11: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, v1 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] -v_sat_pk_u8_i16 v5, v255 -// GFX11: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, v255 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] -v_sat_pk_u8_i16 v5, s1 -// GFX11: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, s1 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, s105 -// GFX11: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, s105 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, vcc_lo -// GFX11: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, vcc_lo +// GFX11: v_sat_pk_u8_i16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, vcc_hi -// GFX11: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, vcc_hi +// GFX11: v_sat_pk_u8_i16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, ttmp15 -// GFX11: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, ttmp15 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, m0 -// GFX11: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, m0 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, exec_lo -// GFX11: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, exec_lo +// GFX11: v_sat_pk_u8_i16_e32 v5.l, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, exec_hi -// GFX11: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, exec_hi +// GFX11: v_sat_pk_u8_i16_e32 v5.l, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, null -// GFX11: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, null +// GFX11: v_sat_pk_u8_i16_e32 v5.l, null ; encoding: [0x7c,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, -1 -// GFX11: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, -1 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, 0.5 -// GFX11: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, 0.5 +// GFX11: v_sat_pk_u8_i16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v5, src_scc -// GFX11: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] +v_sat_pk_u8_i16 v5.l, src_scc +// GFX11: v_sat_pk_u8_i16_e32 v5.l, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] -v_sat_pk_u8_i16 v127, 0xfe0b -// GFX11: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +v_sat_pk_u8_i16 v127.l, 0xfe0b +// GFX11: v_sat_pk_u8_i16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_sat_pk_u8_i16 v127.l, 0.5 +// GFX11: v_sat_pk_u8_i16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc4,0xfe,0x7e] + +v_sat_pk_u8_i16 v5.h, src_scc +// GFX11: v_sat_pk_u8_i16_e32 v5.h, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7f] + +v_sat_pk_u8_i16 v127.h, 0xfe0b +// GFX11: v_sat_pk_u8_i16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7f,0x0b,0xfe,0x00,0x00] v_sin_f16 v5, v1 // GFX11: v_sin_f16_e32 v5, v1 ; encoding: [0x01,0xc1,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index 98e4b29b25666..da2a3615360a4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -2522,47 +2522,56 @@ v_rsq_f32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_rsq_f32 v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_rsq_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x35,0x30] -v_sat_pk_u8_i16 v5, v1 quad_perm:[3,2,1,0] -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +v_sat_pk_u8_i16 v5.l, v1 quad_perm:[3,2,1,0] +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] -v_sat_pk_u8_i16 v5, v1 quad_perm:[0,1,2,3] -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +v_sat_pk_u8_i16 v5.l, v1 quad_perm:[0,1,2,3] +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] -v_sat_pk_u8_i16 v5, v1 row_mirror -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_mirror +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_half_mirror -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_half_mirror +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_shl:1 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_shl:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_shl:15 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_shl:15 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_shr:1 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_shr:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_shr:15 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_shr:15 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_ror:1 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_ror:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_ror:15 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_ror:15 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] +v_sat_pk_u8_i16 v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] -v_sat_pk_u8_i16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +v_sat_pk_u8_i16 v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] -v_sat_pk_u8_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13] +v_sat_pk_u8_i16 v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x09,0x13] -v_sat_pk_u8_i16 v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30] +v_sat_pk_u8_i16 v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_sat_pk_u8_i16_dpp v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30] + +v_sat_pk_u8_i16 v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_sat_pk_u8_i16_dpp v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0x01,0x5f,0x01,0x01] + +v_sat_pk_u8_i16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_sat_pk_u8_i16_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x09,0x13] + +v_sat_pk_u8_i16 v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_sat_pk_u8_i16_dpp v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x05,0x30] v_sin_f16 v5, v1 quad_perm:[3,2,1,0] // GFX11: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index ab4606af2bb35..34cb2d097b7a7 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -587,14 +587,23 @@ v_rsq_f32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_rsq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_rsq_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00] -v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sat_pk_u8_i16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +v_sat_pk_u8_i16 v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] -v_sat_pk_u8_i16 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +v_sat_pk_u8_i16 v127.l, v255 dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_sat_pk_u8_i16_dpp v127.l, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] + +v_sat_pk_u8_i16 v127.l, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sat_pk_u8_i16_dpp v127.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00] v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_sin_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s index 4ae91340386b6..9c5693de3d8b1 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -716,6 +716,24 @@ v_sat_pk_u8_i16_e32 v199, v5 dpp8:[7,6,5,4,3,2,1,0] v_sat_pk_u8_i16_e32 v199, v5 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction +v_sat_pk_u8_i16_e32 v199.h, v5.h +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.h, v5.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.h, v5.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5.l +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + v_sin_f16_e32 v128, 0xfe0b // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s index 1d441720280ca..fa6ab407f87c7 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s @@ -1802,14 +1802,23 @@ v_rsq_f16 v5, v199 dpp8:[7,6,5,4,3,2,1,0] v_rsq_f16 v5, v199 quad_perm:[3,2,1,0] // GFX11: v_rsq_f16_e64_dpp v5, v199 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0xc7,0x1b,0x00,0xff] -v_sat_pk_u8_i16 v199, v5 -// GFX11: v_sat_pk_u8_i16_e64 v199, v5 ; encoding: [0xc7,0x00,0xe2,0xd5,0x05,0x01,0x00,0x00] +v_sat_pk_u8_i16 v199.h, v5 +// GFX11: v_sat_pk_u8_i16_e64 v199.h, v5 op_sel:[0,1] ; encoding: [0xc7,0x40,0xe2,0xd5,0x05,0x01,0x00,0x00] -v_sat_pk_u8_i16 v199, v5 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sat_pk_u8_i16_e64_dpp v199, v5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05] +v_sat_pk_u8_i16 v199.h, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x40,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05] -v_sat_pk_u8_i16 v199, v5 quad_perm:[3,2,1,0] -// GFX11: v_sat_pk_u8_i16_e64_dpp v199, v5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff] +v_sat_pk_u8_i16 v199.h, v5 quad_perm:[3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff] + +v_sat_pk_u8_i16 v199.l, v5 +// GFX11: v_sat_pk_u8_i16_e64 v199.l, v5 ; encoding: [0xc7,0x00,0xe2,0xd5,0x05,0x01,0x00,0x00] + +v_sat_pk_u8_i16 v199.l, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v199.l, v5 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v199.l, v5 quad_perm:[3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v199.l, v5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff] v_sin_f16 v128, 0xfe0b // GFX11: v_sin_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xe0,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s index f38ff6a2fdd7d..1bd1a5c5695bc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -2644,47 +2644,50 @@ v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x05,0x30] -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[0,1,2,3] +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_mirror +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_half_mirror +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:15 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:15 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:15 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] -v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] +v_sat_pk_u8_i16_e64_dpp v255.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v255.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] + +v_sat_pk_u8_i16_e64_dpp v255.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s index 95407886ccba1..65af1c1829902 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -760,14 +760,17 @@ v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0xae,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64_dpp v255.l, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_sat_pk_u8_i16_e64_dpp v255.l, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64_dpp v255.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x40,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s index 3850f0254a7f1..1108887c26ed4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s @@ -3190,50 +3190,53 @@ v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 // GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -v_sat_pk_u8_i16_e64 v5, v1 -// GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, v1 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, v255 -// GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, v255 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, s1 -// GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, s1 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, s105 -// GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, s105 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, vcc_lo -// GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, vcc_lo +// GFX11: v_sat_pk_u8_i16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, vcc_hi -// GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, vcc_hi +// GFX11: v_sat_pk_u8_i16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, ttmp15 -// GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, ttmp15 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, m0 -// GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, m0 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, exec_lo -// GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, exec_lo +// GFX11: v_sat_pk_u8_i16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, exec_hi -// GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, exec_hi +// GFX11: v_sat_pk_u8_i16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, null -// GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, null +// GFX11: v_sat_pk_u8_i16_e64 v5.l, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, -1 -// GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, -1 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, 0.5 -// GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, 0.5 +// GFX11: v_sat_pk_u8_i16_e64 v5.l, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v5, src_scc -// GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64 v5.l, src_scc +// GFX11: v_sat_pk_u8_i16_e64 v5.l, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64 v255, 0xfe0b -// GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_sat_pk_u8_i16_e64 v255.l, 0xfe0b +// GFX11: v_sat_pk_u8_i16_e64 v255.l, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sat_pk_u8_i16_e64 v255.h, 0xfe0b +// GFX11: [0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] v_sin_f16_e64 v5, v1 // GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index edd3b916f4e5f..086356fbca25a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -3301,49 +3301,70 @@ v_rsq_f64 v[254:255], 0xaf123456 // GFX12: v_rsq_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x62,0xfc,0x7f,0x56,0x34,0x12,0xaf] v_sat_pk_u8_i16 v5, v1 -// GFX12: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] v_sat_pk_u8_i16 v5, v255 -// GFX12: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] v_sat_pk_u8_i16 v5, s1 -// GFX12: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, s105 -// GFX12: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, vcc_lo -// GFX12: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, vcc_hi -// GFX12: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, ttmp15 -// GFX12: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, m0 -// GFX12: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, exec_lo -// GFX12: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, exec_hi -// GFX12: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, null -// GFX12: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, null ; encoding: [0x7c,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, -1 -// GFX12: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, 0.5 -// GFX12: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v5, src_scc -// GFX12: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v5.l, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] v_sat_pk_u8_i16 v127, 0xfe0b -// GFX12: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +// GFX12-ASM: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +// GFX12-DIS: v_sat_pk_u8_i16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +v_sat_pk_u8_i16 v5.h, src_scc +// GFX12: v_sat_pk_u8_i16_e32 v5.h, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7f] + +v_sat_pk_u8_i16 v127.h, 0xfe0b +// GFX12: v_sat_pk_u8_i16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7f,0x0b,0xfe,0x00,0x00] v_sin_f16 v5, v1 // GFX12: v_sin_f16_e32 v5, v1 ; encoding: [0x01,0xc1,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s index 56b42f19db38a..26e7162206aed 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s @@ -2632,6 +2632,12 @@ v_sat_pk_u8_i16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_sat_pk_u8_i16 v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x05,0x30] +v_sat_pk_u8_i16 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_sat_pk_u8_i16_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x09,0x13] + +v_sat_pk_u8_i16 v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_sat_pk_u8_i16_dpp v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x05,0x30] + v_sin_f16 v5, v1 quad_perm:[3,2,1,0] // GFX12: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s index 09f3069114d4a..a54ae771fab40 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s @@ -619,6 +619,12 @@ v_sat_pk_u8_i16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sat_pk_u8_i16 v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +v_sat_pk_u8_i16 v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00] + v_sin_f16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_sin_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s index 0ccad9c673079..01aa7a44bbc23 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_err.s @@ -625,6 +625,24 @@ v_sat_pk_u8_i16_e32 v199, v5 dpp8:[7,6,5,4,3,2,1,0] v_sat_pk_u8_i16_e32 v199, v5 quad_perm:[3,2,1,0] // GFX12: :[[@LINE-1]]:30: error: invalid operand for instruction +v_sat_pk_u8_i16_e32 v199.h, v5 +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.h, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.h, v5 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5 +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_sat_pk_u8_i16_e32 v199.l, v5 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:21: error: invalid operand for instruction + v_sin_f16_e32 v128, 0xfe0b // GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s index f220ec2b7d1e5..4c983af094561 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_t16_promote.s @@ -1771,6 +1771,15 @@ v_sat_pk_u8_i16 v199, v5 dpp8:[7,6,5,4,3,2,1,0] v_sat_pk_u8_i16 v199, v5 quad_perm:[3,2,1,0] // GFX12: v_sat_pk_u8_i16_e64_dpp v199, v5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff] +v_sat_pk_u8_i16 v199.h, v5 +// GFX12: v_sat_pk_u8_i16_e64 v199.h, v5 op_sel:[0,1] ; encoding: [0xc7,0x40,0xe2,0xd5,0x05,0x01,0x00,0x00] + +v_sat_pk_u8_i16 v199.h, v5 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xc7,0x40,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x05,0x77,0x39,0x05] + +v_sat_pk_u8_i16 v199.h, v5 quad_perm:[3,2,1,0] +// GFX12: v_sat_pk_u8_i16_e64_dpp v199.h, v5 op_sel:[0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xc7,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x05,0x1b,0x00,0xff] + v_sin_f16 v128, 0xfe0b // GFX12: v_sin_f16_e64 v128, 0xfe0b ; encoding: [0x80,0x00,0xe0,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s index 015619d31504b..ea4a58d9d0f7e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1.s @@ -3385,6 +3385,9 @@ v_sat_pk_u8_i16_e64 v5, src_scc v_sat_pk_u8_i16_e64 v255, 0xfe0b // GFX12: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +v_sat_pk_u8_i16_e64 v255.h, 0xfe0b +// GFX12: v_sat_pk_u8_i16_e64 v255.h, 0xfe0b op_sel:[0,1] ; encoding: [0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + v_sin_f16_e64 v5, v1 // GFX12: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s index 160bc3fc6afc7..a9b933e639abb 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp16.s @@ -2548,6 +2548,9 @@ v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] +v_sat_pk_u8_i16_e64_dpp v255.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x05,0x30] + v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] // GFX12: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s index c9ea7cdf1512e..af335f2e0b586 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1_dpp8.s @@ -730,6 +730,9 @@ v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +v_sat_pk_u8_i16_e64_dpp v255.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt index 61e529abf4455..f02f0206acd2f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1.txt @@ -3220,49 +3220,74 @@ # GFX11: v_rsq_f64_e32 v[254:255], 0xaf123456 ; encoding: [0xff,0x62,0xfc,0x7f,0x56,0x34,0x12,0xaf] 0x01,0xc5,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, v1 ; encoding: [0x01,0xc5,0x0a,0x7e] 0xff,0xc5,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, v255 ; encoding: [0xff,0xc5,0x0a,0x7e] 0x01,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, s1 ; encoding: [0x01,0xc4,0x0a,0x7e] 0x69,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, s105 ; encoding: [0x69,0xc4,0x0a,0x7e] 0x6a,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, vcc_lo ; encoding: [0x6a,0xc4,0x0a,0x7e] 0x6b,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, vcc_hi ; encoding: [0x6b,0xc4,0x0a,0x7e] 0x7b,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, ttmp15 ; encoding: [0x7b,0xc4,0x0a,0x7e] 0x7d,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, m0 ; encoding: [0x7d,0xc4,0x0a,0x7e] 0x7e,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, exec_lo ; encoding: [0x7e,0xc4,0x0a,0x7e] 0x7f,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, exec_hi ; encoding: [0x7f,0xc4,0x0a,0x7e] 0x7c,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, null ; encoding: [0x7c,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, null ; encoding: [0x7c,0xc4,0x0a,0x7e] 0xc1,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, -1 ; encoding: [0xc1,0xc4,0x0a,0x7e] 0xf0,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, 0.5 ; encoding: [0xf0,0xc4,0x0a,0x7e] 0xfd,0xc4,0x0a,0x7e -# GFX11: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.l, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v5, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7e] 0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v127.l, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v127, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7e,0x0b,0xfe,0x00,0x00] + +0xf0,0xc4,0xfe,0x7e +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v127.l, 0.5 ; encoding: [0xf0,0xc4,0xfe,0x7e] +# GFX11-FAKE16: v_sat_pk_u8_i16_e32 v127, 0.5 ; encoding: [0xf0,0xc4,0xfe,0x7e] + +0xfd,0xc4,0x0a,0x7f +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v5.h, src_scc ; encoding: [0xfd,0xc4,0x0a,0x7f] + +0xff,0xc4,0xfe,0x7f,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_sat_pk_u8_i16_e32 v127.h, 0xfe0b ; encoding: [0xff,0xc4,0xfe,0x7f,0x0b,0xfe,0x00,0x00] 0x01,0xc1,0x0a,0x7e # GFX11: v_sin_f16_e32 v5, v1 ; encoding: [0x01,0xc1,0x0a,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt index 1075a3eecd540..a4491e02abf05 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp16.txt @@ -2525,46 +2525,72 @@ # GFX11: v_rsq_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x3d,0x30] 0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01 -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] 0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13 -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] 0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30 -# GFX11: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] + +0xfa,0xc4,0xfe,0x7e,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v127, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0x01,0x5f,0x01,0x01] + +0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_mul_i32_i24_e32 v128, s1, v176 ; encoding: [0x01,0x60,0x01,0x13] + +0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] 0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff # GFX11: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt index 051dd348e9a38..4e15731203168 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop1_dpp8.txt @@ -470,10 +470,23 @@ # GFX11: v_rsq_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05 -# GFX11: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] 0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.l, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] + +0xe9,0xc4,0xfe,0x7e,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_sat_pk_u8_i16_dpp v127, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0xfe,0x7e,0x01,0x77,0x39,0x05] + +0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_dot2acc_f32_f16 v156, v1, v187 ; encoding: [0x01,0x77,0x39,0x05] + +0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX11: v_sin_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt index 2666b758344c6..f97c678e6a90a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -2687,46 +2687,64 @@ # GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] 0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v255.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt index c19947c4bd6ff..3cad28d888202 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -727,10 +727,16 @@ # GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v255.l, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +0xff,0x40,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 +# GFX11-REAL16: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt index 3df206ccf522e..8b2bc97c5de1f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -3204,49 +3204,68 @@ # GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] 0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v5.l, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] 0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v255.l, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_sat_pk_u8_i16_e64 v255.h, 0xfe0b op_sel:[0,1] ; encoding: [0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 # GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt index a1291b2e34f34..aa60378da9ab0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp16.txt @@ -2661,46 +2661,68 @@ # GFX12: v_rsq_f32_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0x5c,0xfe,0x7f,0xff,0x6f,0x3d,0x30] 0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1b,0x00,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0xe4,0x00,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x40,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x41,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x01,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x0f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x11,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x1f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x21,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x2f,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x50,0x01,0xff] 0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01 -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x5f,0x01,0x01] 0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13 -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7e,0x01,0x60,0x01,0x13] 0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30 -# GFX12: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v127, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7e,0xff,0x6f,0x0d,0x30] + +0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0xc4,0x0a,0x7f,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_mul_i32_i24_e32 v128, s1, v176 ; encoding: [0x01,0x60,0x01,0x13] + +0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x0d,0x30 +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xc4,0xfe,0x7f,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] 0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff # GFX12: v_sin_f16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xc0,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt index 05008bfabc45a..99985e09d7432 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt @@ -483,10 +483,19 @@ # GFX12: v_rsq_f32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x5c,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05 -# GFX12: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7e,0x01,0x77,0x39,0x05] 0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.l, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_dpp v127, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7e,0xff,0x00,0x00,0x00] + +0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05] + +0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00] 0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX12: v_sin_f16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt index bb9f607b6ece6..8ba4f58b787f5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt @@ -3250,49 +3250,68 @@ # GFX12: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] 0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v5.l, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] 0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v255.l, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_sat_pk_u8_i16_e64 v255.h, 0xfe0b op_sel:[0,1] ; encoding: [0xff,0x40,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 # GFX12: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt index be9f069322da8..98da7c8c54508 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt @@ -2531,46 +2531,64 @@ # GFX12: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] 0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] 0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v255.l, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff # GFX12: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt index 87115b962a808..8213237ada1e2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt @@ -691,10 +691,16 @@ # GFX12: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] 0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v5.l, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] 0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 -# GFX12: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v255.l, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +0xff,0x40,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 +# GFX12-REAL16: v_sat_pk_u8_i16_e64_dpp v255.h, v255 op_sel:[0,1] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x40,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +# GFX12-FAKE16: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 # GFX12: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]