From aa6b182999ca694223f6f7bb95c49569c87c4c31 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Mon, 9 Dec 2024 18:11:27 -0500 Subject: [PATCH] VOP2 test change for v_max_f16/v_min_f16 --- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 6 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2.s | 150 +++++++++++------- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s | 130 ++++++++------- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s | 42 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s | 126 ++++++++++----- .../MC/AMDGPU/gfx11_asm_vop2_t16_promote.s | 126 ++++++++++----- .../AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s | 136 +++++++++------- .../MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s | 56 +++++-- .../test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s | 138 +++++++++------- 9 files changed, 574 insertions(+), 336 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 384fec0079a5d..c70663e4a43cb 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1827,10 +1827,8 @@ defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">; defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; -defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; -defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; -defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; -defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; +defm V_MAX_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11<0x039, "v_max_f16">; +defm V_MIN_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11<0x03a, "v_min_f16">; defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">; defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index cb7b71935e22a..5f84ef22b73e6 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -1177,50 +1177,65 @@ v_lshrrev_b32 v5, src_scc, v2 v_lshrrev_b32 v255, 0xaf123456, v255 // GFX11: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] -v_max_f16 v5, v1, v2 -// GFX11: v_max_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72] +v_max_f16 v5.l, v1.l, v2.l +// GFX11: v_max_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x72] -v_max_f16 v5, v127, v2 -// GFX11: v_max_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x72] +v_max_f16 v5.l, v127.l, v2.l +// GFX11: v_max_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x72] -v_max_f16 v5, s1, v2 -// GFX11: v_max_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x72] +v_max_f16 v5.l, s1, v2.l +// GFX11: v_max_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x72] -v_max_f16 v5, s105, v2 -// GFX11: v_max_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x72] +v_max_f16 v5.l, s105, v2.l +// GFX11: v_max_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x72] -v_max_f16 v5, vcc_lo, v2 -// GFX11: v_max_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x72] +v_max_f16 v5.l, vcc_lo, v2.l +// GFX11: v_max_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x72] -v_max_f16 v5, vcc_hi, v2 -// GFX11: v_max_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x72] +v_max_f16 v5.l, vcc_hi, v2.l +// GFX11: v_max_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x72] -v_max_f16 v5, ttmp15, v2 -// GFX11: v_max_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x72] +v_max_f16 v5.l, ttmp15, v2.l +// GFX11: v_max_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x72] -v_max_f16 v5, m0, v2 -// GFX11: v_max_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x72] +v_max_f16 v5.l, m0, v2.l +// GFX11: v_max_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x72] -v_max_f16 v5, exec_lo, v2 -// GFX11: v_max_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x72] +v_max_f16 v5.l, exec_lo, v2.l +// GFX11: v_max_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x72] -v_max_f16 v5, exec_hi, v2 -// GFX11: v_max_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x72] +v_max_f16 v5.l, exec_hi, v2.l +// GFX11: v_max_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x72] -v_max_f16 v5, null, v2 -// GFX11: v_max_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x72] +v_max_f16 v5.l, null, v2.l +// GFX11: v_max_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x72] -v_max_f16 v5, -1, v2 -// GFX11: v_max_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x72] +v_max_f16 v5.l, -1, v2.l +// GFX11: v_max_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x72] -v_max_f16 v5, 0.5, v2 -// GFX11: v_max_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x72] +v_max_f16 v5.l, 0.5, v2.l +// GFX11: v_max_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x72] -v_max_f16 v5, src_scc, v2 -// GFX11: v_max_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x72] +v_max_f16 v5.l, src_scc, v2.l +// GFX11: v_max_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x72] -v_max_f16 v127, 0xfe0b, v127 -// GFX11: v_max_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] +v_max_f16 v127.l, 0xfe0b, v127.l +// GFX11: v_max_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] + +v_max_f16 v5.l, v1.h, v2.l +// GFX11: v_max_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x72] + +v_max_f16 v5.l, v127.h, v2.l +// GFX11: v_max_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x72] + +v_max_f16 v127.l, 0.5, v127.l +// GFX11: v_max_f16_e32 v127.l, 0.5, v127.l ; encoding: [0xf0,0xfe,0xfe,0x72] + +v_max_f16 v5.h, src_scc, v2.h +// GFX11: v_max_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x73] + +v_max_f16 v127.h, 0xfe0b, v127.h +// GFX11: v_max_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00] v_max_f32 v5, v1, v2 // GFX11: v_max_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x20] @@ -1357,50 +1372,65 @@ v_max_u32 v5, src_scc, v2 v_max_u32 v255, 0xaf123456, v255 // GFX11: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] -v_min_f16 v5, v1, v2 -// GFX11: v_min_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74] +v_min_f16 v5.l, v1.l, v2.l +// GFX11: v_min_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x74] + +v_min_f16 v5.l, v127.l, v2.l +// GFX11: v_min_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x74] + +v_min_f16 v5.l, s1, v2.l +// GFX11: v_min_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x74] + +v_min_f16 v5.l, s105, v2.l +// GFX11: v_min_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x74] + +v_min_f16 v5.l, vcc_lo, v2.l +// GFX11: v_min_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x74] + +v_min_f16 v5.l, vcc_hi, v2.l +// GFX11: v_min_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x74] -v_min_f16 v5, v127, v2 -// GFX11: v_min_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x74] +v_min_f16 v5.l, ttmp15, v2.l +// GFX11: v_min_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x74] -v_min_f16 v5, s1, v2 -// GFX11: v_min_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x74] +v_min_f16 v5.l, m0, v2.l +// GFX11: v_min_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x74] -v_min_f16 v5, s105, v2 -// GFX11: v_min_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x74] +v_min_f16 v5.l, exec_lo, v2.l +// GFX11: v_min_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x74] -v_min_f16 v5, vcc_lo, v2 -// GFX11: v_min_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x74] +v_min_f16 v5.l, exec_hi, v2.l +// GFX11: v_min_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x74] -v_min_f16 v5, vcc_hi, v2 -// GFX11: v_min_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x74] +v_min_f16 v5.l, null, v2.l +// GFX11: v_min_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x74] -v_min_f16 v5, ttmp15, v2 -// GFX11: v_min_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x74] +v_min_f16 v5.l, -1, v2.l +// GFX11: v_min_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x74] -v_min_f16 v5, m0, v2 -// GFX11: v_min_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x74] +v_min_f16 v5.l, 0.5, v2.l +// GFX11: v_min_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x74] -v_min_f16 v5, exec_lo, v2 -// GFX11: v_min_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x74] +v_min_f16 v5.l, src_scc, v2.l +// GFX11: v_min_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x74] -v_min_f16 v5, exec_hi, v2 -// GFX11: v_min_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x74] +v_min_f16 v127.l, 0xfe0b, v127.l +// GFX11: v_min_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] -v_min_f16 v5, null, v2 -// GFX11: v_min_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x74] +v_min_f16 v5.l, v1.h, v2.l +// GFX11: v_min_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x74] -v_min_f16 v5, -1, v2 -// GFX11: v_min_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x74] +v_min_f16 v5.l, v127.h, v2.l +// GFX11: v_min_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x74] -v_min_f16 v5, 0.5, v2 -// GFX11: v_min_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x74] +v_min_f16 v127.l, 0.5, v127.l +// GFX11: v_min_f16_e32 v127.l, 0.5, v127.l ; encoding: [0xf0,0xfe,0xfe,0x74] -v_min_f16 v5, src_scc, v2 -// GFX11: v_min_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x74] +v_min_f16 v5.h, src_scc, v2.h +// GFX11: v_min_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x75] -v_min_f16 v127, 0xfe0b, v127 -// GFX11: v_min_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] +v_min_f16 v127.h, 0xfe0b, v127.h +// GFX11: v_min_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00] v_min_f32 v5, v1, v2 // GFX11: v_min_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s index 00353c4cdcb49..151784b1646ae 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -849,47 +849,56 @@ v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi: v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] -v_max_f16 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] +v_max_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] -v_max_f16 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] +v_max_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] -v_max_f16 v5, v1, v2 row_mirror -// GFX11: v_max_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_mirror +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] -v_max_f16 v5, v1, v2 row_half_mirror -// GFX11: v_max_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] -v_max_f16 v5, v1, v2 row_shl:1 -// GFX11: v_max_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] -v_max_f16 v5, v1, v2 row_shl:15 -// GFX11: v_max_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] -v_max_f16 v5, v1, v2 row_shr:1 -// GFX11: v_max_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] -v_max_f16 v5, v1, v2 row_shr:15 -// GFX11: v_max_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] -v_max_f16 v5, v1, v2 row_ror:1 -// GFX11: v_max_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] -v_max_f16 v5, v1, v2 row_ror:15 -// GFX11: v_max_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] -v_max_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] +v_max_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] -v_max_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] +v_max_f16 v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] -v_max_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_max_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x09,0x13] +v_max_f16 v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x09,0x13] -v_max_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_max_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xf5,0x30] +v_max_f16 v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_max_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xf5,0x30] + +v_max_f16 v127.l, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max_f16_dpp v127.l, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x5f,0x01,0x01] + +v_max_f16 v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max_f16_dpp v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x73,0x81,0x60,0x09,0x13] + +v_max_f16 v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max_f16_dpp v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x73,0xff,0x6f,0xf5,0x30] v_max_f32 v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] @@ -1017,47 +1026,56 @@ v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] -v_min_f16 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] +v_min_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] + +v_min_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] + +v_min_f16 v5.l, v1.l, v2.l row_mirror +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] + +v_min_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] -v_min_f16 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] +v_min_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] -v_min_f16 v5, v1, v2 row_mirror -// GFX11: v_min_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] -v_min_f16 v5, v1, v2 row_half_mirror -// GFX11: v_min_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] -v_min_f16 v5, v1, v2 row_shl:1 -// GFX11: v_min_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] -v_min_f16 v5, v1, v2 row_shl:15 -// GFX11: v_min_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] -v_min_f16 v5, v1, v2 row_shr:1 -// GFX11: v_min_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] -v_min_f16 v5, v1, v2 row_shr:15 -// GFX11: v_min_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] -v_min_f16 v5, v1, v2 row_ror:1 -// GFX11: v_min_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] -v_min_f16 v5, v1, v2 row_ror:15 -// GFX11: v_min_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] +v_min_f16 v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x09,0x13] -v_min_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] +v_min_f16 v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_min_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xf5,0x30] -v_min_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] +v_min_f16 v127.l, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min_f16_dpp v127.l, v127.l, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x5f,0x01,0x01] -v_min_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_min_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x09,0x13] +v_min_f16 v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min_f16_dpp v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x75,0x81,0x60,0x09,0x13] -v_min_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_min_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xf5,0x30] +v_min_f16 v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min_f16_dpp v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x75,0xff,0x6f,0xf5,0x30] v_min_f32 v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s index 489e6d8c9d63a..1bb62f7a0ccc2 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -190,14 +190,23 @@ v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] +v_max_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] -v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] +v_max_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] -v_max_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_max_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] +v_max_f16 v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_max_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] + +v_max_f16 v127.l, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_dpp v127.l, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfe,0x72,0x7f,0x77,0x39,0x05] + +v_max_f16 v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max_f16_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x73,0x81,0x77,0x39,0x05] + +v_max_f16 v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max_f16_dpp v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x73,0xff,0x00,0x00,0x00] v_max_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] @@ -226,14 +235,23 @@ v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] +v_min_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_min_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] + +v_min_f16 v127.l, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_dpp v127.l, v127.l, v127.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0xfe,0x74,0x7f,0x77,0x39,0x05] -v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] +v_min_f16 v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min_f16_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x75,0x81,0x77,0x39,0x05] -v_min_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_min_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] +v_min_f16 v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min_f16_dpp v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x75,0xff,0x00,0x00,0x00] v_min_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s index e5504074079e5..ff2d9a86d8845 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s @@ -173,59 +173,113 @@ v_ldexp_f16_e32 v5.l, v1.l, v255.l v_ldexp_f16_e32 v5.l, v255.l, v2.l // GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_max_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction -v_max_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction -v_max_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction -v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction -v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v255.h, v1.h, v2.h +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v255.l, v1.l, v2.l +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction -v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5.h, v1.h, v255.h +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5.h, v255.h, v2.h +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction -v_min_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5.l, v1.l, v255.l +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5.l, v255.l, v2.l +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_dpp v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_dpp v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_dpp v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_dpp v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_dpp v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_dpp v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_e32 v255.h, v1.h, v2.h +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_e32 v255.l, v1.l, v2.l +// GFX11: :[[@LINE-1]]:15: error: invalid operand for instruction + +v_min_f16_e32 v5.h, v1.h, v255.h +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_e32 v5.h, v255.h, v2.h +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction + +v_min_f16_e32 v5.l, v1.l, v255.l +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_min_f16_e32 v5.l, v255.l, v2.l +// GFX11: :[[@LINE-1]]:21: error: invalid operand for instruction v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s index 63394502ec14e..13b2ffa2e2d77 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -92,59 +92,113 @@ v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] // GFX11: v_ldexp_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_f16 v255, v1, v2 -// GFX11: v_max_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] +v_max_f16 v255.h, v1.h, v2.h +// GFX11: v_max_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x39,0xd5,0x01,0x05,0x02,0x00] -v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_f16 v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x58,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_f16 v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x58,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_f16 v5, v1, v255 -// GFX11: v_max_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0xff,0x03,0x00] +v_max_f16 v255.l, v1.l, v2.l +// GFX11: v_max_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_max_f16 v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_max_f16 v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_f16 v5, v255, v2 -// GFX11: v_max_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0x05,0x02,0x00] +v_max_f16 v5.h, v1.h, v255.h +// GFX11: v_max_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x39,0xd5,0x01,0xff,0x03,0x00] -v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_max_f16 v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x39,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_max_f16 v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_min_f16 v255, v1, v2 -// GFX11: v_min_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] +v_max_f16 v5.h, v255.h, v2.h +// GFX11: v_max_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x39,0xd5,0xff,0x05,0x02,0x00] -v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_f16 v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x39,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_f16 v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_f16 v5, v1, v255 -// GFX11: v_min_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0xff,0x03,0x00] +v_max_f16 v5.l, v1.l, v255.l +// GFX11: v_max_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x39,0xd5,0x01,0xff,0x03,0x00] -v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_max_f16 v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_max_f16 v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 -// GFX11: v_min_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0x05,0x02,0x00] +v_max_f16 v5.l, v255.l, v2.l +// GFX11: v_max_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x39,0xd5,0xff,0x05,0x02,0x00] -v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_max_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_max_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_min_f16 v255.h, v1.h, v2.h +// GFX11: v_min_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3a,0xd5,0x01,0x05,0x02,0x00] + +v_min_f16 v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x58,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f16 v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x58,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16 v255.l, v1.l, v2.l +// GFX11: v_min_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] + +v_min_f16 v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f16 v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16 v5.h, v1.h, v255.h +// GFX11: v_min_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x3a,0xd5,0x01,0xff,0x03,0x00] + +v_min_f16 v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x3a,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_min_f16 v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16 v5.h, v255.h, v2.h +// GFX11: v_min_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x3a,0xd5,0xff,0x05,0x02,0x00] + +v_min_f16 v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x3a,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_min_f16 v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_min_f16 v5.l, v1.l, v255.l +// GFX11: v_min_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0xff,0x03,0x00] + +v_min_f16 v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_min_f16 v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16 v5.l, v255.l, v2.l +// GFX11: v_min_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0x05,0x02,0x00] + +v_min_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_min_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] v_mul_f16 v255, v1, v2 // GFX11: v_mul_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s index a63637dc22e3a..7d435bd79c882 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s @@ -734,47 +734,59 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_max_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_max_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_max_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] +v_max_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] + +v_max_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_max_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_max_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_max_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_max_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -902,47 +914,59 @@ v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_min_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_min_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] -v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_min_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_min_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_min_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_min_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_min_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] +v_min_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_min_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s index 49ee9a7b02bed..92a3deda095e5 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s @@ -223,17 +223,29 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +v_max_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +v_max_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +v_max_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_max_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_max_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -265,17 +277,29 @@ v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_min_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +v_min_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +v_min_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_min_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +v_min_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_min_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s index ee6aa11252609..594dda53a4421 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -857,50 +857,59 @@ v_lshrrev_b32_e64 v5, src_scc, vcc_lo v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi // GFX11: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_max_f16_e64 v5, v1, v2 -// GFX11: v_max_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] +v_max_f16_e64 v5.l, v1.l, v2.l +// GFX11: v_max_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -v_max_f16_e64 v5, v255, v255 -// GFX11: v_max_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] +v_max_f16_e64 v5.l, v255.l, v255.l +// GFX11: v_max_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] -v_max_f16_e64 v5, s1, s2 -// GFX11: v_max_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] +v_max_f16_e64 v5.l, s1, s2 +// GFX11: v_max_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] -v_max_f16_e64 v5, s105, s105 -// GFX11: v_max_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] +v_max_f16_e64 v5.l, s105, s105 +// GFX11: v_max_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] -v_max_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: v_max_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] +v_max_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX11: v_max_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] -v_max_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: v_max_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_max_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX11: v_max_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_max_f16_e64 v5, ttmp15, src_scc -// GFX11: v_max_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] +v_max_f16_e64 v5.l, ttmp15, src_scc +// GFX11: v_max_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] -v_max_f16_e64 v5, m0, 0.5 -// GFX11: v_max_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] +v_max_f16_e64 v5.l, m0, 0.5 +// GFX11: v_max_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] -v_max_f16_e64 v5, exec_lo, -1 -// GFX11: v_max_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] +v_max_f16_e64 v5.l, exec_lo, -1 +// GFX11: v_max_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] -v_max_f16_e64 v5, |exec_hi|, null -// GFX11: v_max_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] +v_max_f16_e64 v5.l, |exec_hi|, null +// GFX11: v_max_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] -v_max_f16_e64 v5, null, exec_lo -// GFX11: v_max_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] +v_max_f16_e64 v5.l, null, exec_lo +// GFX11: v_max_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] -v_max_f16_e64 v5, -1, exec_hi -// GFX11: v_max_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] +v_max_f16_e64 v5.l, -1, exec_hi +// GFX11: v_max_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] -v_max_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: v_max_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] +v_max_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX11: v_max_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] -v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] +v_max_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX11: v_max_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] -v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_max_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_max_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_max_f16_e64 v5.l, v1.h, v2.l +// GFX11: v_max_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x39,0xd5,0x01,0x05,0x02,0x00] + +v_max_f16_e64 v5.l, v255.l, v255.h +// GFX11: v_max_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x39,0xd5,0xff,0xff,0x03,0x00] + +v_max_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_max_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_max_f32_e64 v5, v1, v2 // GFX11: v_max_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] @@ -1037,50 +1046,59 @@ v_max_u32_e64 v5, src_scc, vcc_lo v_max_u32_e64 v255, 0xaf123456, vcc_hi // GFX11: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_min_f16_e64 v5, v1, v2 -// GFX11: v_min_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] +v_min_f16_e64 v5.l, v1.l, v2.l +// GFX11: v_min_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] + +v_min_f16_e64 v5.l, v255.l, v255.l +// GFX11: v_min_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] + +v_min_f16_e64 v5.l, s1, s2 +// GFX11: v_min_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] + +v_min_f16_e64 v5.l, s105, s105 +// GFX11: v_min_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] -v_min_f16_e64 v5, v255, v255 -// GFX11: v_min_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] +v_min_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX11: v_min_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] -v_min_f16_e64 v5, s1, s2 -// GFX11: v_min_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] +v_min_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX11: v_min_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_min_f16_e64 v5, s105, s105 -// GFX11: v_min_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] +v_min_f16_e64 v5.l, ttmp15, src_scc +// GFX11: v_min_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] -v_min_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: v_min_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] +v_min_f16_e64 v5.l, m0, 0.5 +// GFX11: v_min_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] -v_min_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: v_min_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_min_f16_e64 v5.l, exec_lo, -1 +// GFX11: v_min_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] -v_min_f16_e64 v5, ttmp15, src_scc -// GFX11: v_min_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] +v_min_f16_e64 v5.l, |exec_hi|, null +// GFX11: v_min_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] -v_min_f16_e64 v5, m0, 0.5 -// GFX11: v_min_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] +v_min_f16_e64 v5.l, null, exec_lo +// GFX11: v_min_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] -v_min_f16_e64 v5, exec_lo, -1 -// GFX11: v_min_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] +v_min_f16_e64 v5.l, -1, exec_hi +// GFX11: v_min_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] -v_min_f16_e64 v5, |exec_hi|, null -// GFX11: v_min_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] +v_min_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX11: v_min_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] -v_min_f16_e64 v5, null, exec_lo -// GFX11: v_min_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] +v_min_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX11: v_min_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] -v_min_f16_e64 v5, -1, exec_hi -// GFX11: v_min_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] +v_min_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_min_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -v_min_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: v_min_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] +v_min_f16_e64 v5.l, v1.h, v2.l +// GFX11: v_min_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x3a,0xd5,0x01,0x05,0x02,0x00] -v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] +v_min_f16_e64 v5.l, v255.l, v255.h +// GFX11: v_min_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x3a,0xd5,0xff,0xff,0x03,0x00] -v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_min_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: v_min_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_min_f32_e64 v5, v1, v2 // GFX11: v_min_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00]