From 302f98102873e77bf1b5959397739d593cc953da Mon Sep 17 00:00:00 2001 From: guochen2 Date: Fri, 11 Apr 2025 16:39:20 -0400 Subject: [PATCH 1/3] clean up of true16 mc changes --- llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s | 26 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop1.s | 5 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s | 8 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s | 74 ++++++++- .../MC/AMDGPU/gfx11_asm_vop1_t16_promote.s | 5 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s | 8 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s | 4 +- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 48 +++--- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 48 +++--- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 48 +++--- llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s | 122 ++++++++++---- llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s | 36 +++-- llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s | 4 +- llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s | 4 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2.s | 146 ++++++++++------- llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s | 126 ++++++++------- llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s | 38 +++-- llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s | 114 +++++++++---- .../MC/AMDGPU/gfx12_asm_vop2_t16_promote.s | 126 ++++++++++----- llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s | 12 +- .../test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s | 138 +++++++++------- .../AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s | 152 ++++++++++-------- .../MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s | 72 ++++++--- 26 files changed, 878 insertions(+), 492 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s index 188e87d20412f..f952d658405c4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 %s 2>&1 | FileCheck %s -check-prefix=GCN-ERR --implicit-check-not=error: --strict-whitespace //===----------------------------------------------------------------------===// // VINTERP src operands must be VGPRs. @@ -25,20 +25,20 @@ v_interp_p2_f32 v0, v1, 2, v3 v_interp_p2_f32 v0, v1, v2, 3 // GCN-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -v_interp_p10_f16_f32 v0, s1, v2, v3 +v_interp_p10_f16_f32 v0, s1, v2, v3.l // GCN-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -v_interp_p10_f16_f32 v0, v1, s2, v3 -// GCN-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_interp_p10_f16_f32 v0, v1.l, s2, v3.l +// GCN-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction -v_interp_p10_f16_f32 v0, v1, v2, s3 -// GCN-ERR: :[[@LINE-1]]:34: error: invalid operand for instruction +v_interp_p10_f16_f32 v0, v1.l, v2, s3 +// GCN-ERR: :[[@LINE-1]]:36: error: invalid operand for instruction -v_interp_p2_f16_f32 v0, 1, v2, v3 -// GCN-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction +v_interp_p2_f16_f32 v0.l, 1, v2, v3.l +// GCN-ERR: :[[@LINE-1]]:27: error: invalid operand for instruction -v_interp_p2_f16_f32 v0, v1, 2, v3 -// GCN-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction - -v_interp_p2_f16_f32 v0, v1, v2, 3 +v_interp_p2_f16_f32 v0.l, v1.l, 2, v3.l // GCN-ERR: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0.l, v1.l, v2, 3 +// GCN-ERR: :[[@LINE-1]]:37: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index 58641332a08da..ff9a09941b173 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s -v_bfrev_b32_e32 v5, v1 +v_bfrev_b32 v5, v1 // GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] v_bfrev_b32 v5, v255 @@ -464,6 +464,9 @@ v_cvt_f16_f32 v5.h, src_scc v_cvt_f16_f32 v127.h, 0xaf123456 // GFX11: v_cvt_f16_f32_e32 v127.h, 0xaf123456 ; encoding: [0xff,0x14,0xfe,0x7f,0x56,0x34,0x12,0xaf] +v_cvt_f16_f32 v127.l, 0.5 +// GFX11: v_cvt_f16_f32_e32 v127.l, 0.5 ; encoding: [0xf0,0x14,0xfe,0x7e] + v_cvt_f16_i16 v5.l, v1.l // GFX11: v_cvt_f16_i16_e32 v5.l, v1.l ; encoding: [0x01,0xa3,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index 2bdb9ecfb7658..40786454584e7 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] +v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] // GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] @@ -389,6 +389,9 @@ v_cvt_f16_f32 v5.h, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cvt_f16_f32 v127.h, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_f16_f32_dpp v127.h, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x14,0xfe,0x7f,0xff,0x6f,0x35,0x30] +v_cvt_f16_f32 v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_f16_f32_dpp v127.l, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x14,0xfe,0x7e,0x01,0x5f,0x01,0x01] + v_cvt_f16_i16 v5.l, v1.l quad_perm:[3,2,1,0] // GFX11: v_cvt_f16_i16_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xa2,0x0a,0x7e,0x01,0x1b,0x00,0xff] @@ -515,6 +518,9 @@ v_cvt_f32_f16 v5, v1.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_cvt_f32_f16 v255, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_f32_f16_dpp v255, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x16,0xfe,0x7f,0xff,0x6f,0x35,0x30] +v_cvt_f32_f16 v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_f32_f16_dpp v5, v127.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x16,0x0a,0x7e,0x7f,0x5f,0x01,0x01] + v_cvt_f32_i32 v5, v1 quad_perm:[3,2,1,0] // GFX11: v_cvt_f32_i32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x0a,0x0a,0x7e,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index ba0c3495de2bb..160a0ec2c4eb6 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s index b1eb63699409f..55a25ad3ec81b 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_err.s @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s @@ -116,6 +116,24 @@ v_cvt_f16_f32_e32 v128, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] v_cvt_f16_f32_e32 v128, 0xaf123456 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction +v_cvt_f16_f32_e32 v128.h, 0xaf123456 +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128.h, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128.h, 0xaf123456 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128.l, 0xaf123456 +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128.l, 0xaf123456 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v128.l, 0xaf123456 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + v_cvt_f16_f32_e32 v255, v1 // GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode @@ -134,6 +152,42 @@ v_cvt_f16_f32_e32 v255, v255 dpp8:[7,6,5,4,3,2,1,0] v_cvt_f16_f32_e32 v255, v255 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction +v_cvt_f16_f32_e32 v255.h, v1.h +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.h, v1.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.h, v1.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.h, v255.h +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v1.l +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v1.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v1.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v255.l +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f16_f32_e32 v255.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + v_cvt_f16_i16_e32 v128.h, 0xfe0b // GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction @@ -227,6 +281,24 @@ v_cvt_f32_f16_e32 v5, v199 dpp8:[7,6,5,4,3,2,1,0] v_cvt_f32_f16_e32 v5, v199 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cvt_f32_f16_e32 v5.h, v199.h +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5.h, v199.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5.h, v199.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5.l, v199.l +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5.l, v199.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_cvt_f32_f16_e32 v5.l, v199.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction + v_cvt_i16_f16_e32 v128.h, 0xfe0b // GFX11: :[[@LINE-1]]:19: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s index 6facd4dca01bf..fdf3efdd864dc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_t16_promote.s @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s @@ -308,6 +308,9 @@ v_cvt_f16_f32 v255.h, ttmp15 v_cvt_f16_f32 v255.h, v1 // GFX11: v_cvt_f16_f32_e64 v255.h, v1 op_sel:[0,1] ; encoding: [0xff,0x40,0x8a,0xd5,0x01,0x01,0x00,0x00] +v_cvt_f16_f32 v255.h, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_f16_f32_e64_dpp v255.h, v1 op_sel:[0,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x40,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + v_cvt_f16_f32 v255.h, v1 quad_perm:[3,2,1,0] // GFX11: v_cvt_f16_f32_e64_dpp v255.h, v1 op_sel:[0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x40,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index 163010adf7bd4..b1233620fe613 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo // W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s index 2695059853b08..471f3f1baa677 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode @@ -459,15 +459,15 @@ v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 boun // W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32 v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32 v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32 v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s index b379a5d06b99b..bce9f42ae52bc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s index dedbcb55d7976..38a7319fd6734 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s @@ -3,11 +3,11 @@ v_fmaak_f32 v0, 0xff32, v0, 0 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed -v_fmaak_f16 v0, 0xff32, v0, 0 +v_fmaak_f16 v0.l, 0xff32, v0.l, 0 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_fmamk_f32 v0, 0xff32, 1, v0 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed -v_fmamk_f16 v0, 0xff32, 1, v0 +v_fmamk_f16 v0.l, 0xff32, 1, v0.l // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index ce452957b0198..14949ed257000 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -644,13 +644,13 @@ v_ashrrev_i16 v5.l, src_scc, vcc_lo v_ashrrev_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_ashrrev_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_ashrrev_i16 v5.l, v1.h, v2.l +v_ashrrev_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_ashrrev_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x3a,0xd7,0x01,0x05,0x02,0x00] -v_ashrrev_i16 v5.l, v255.l, v255.h +v_ashrrev_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_ashrrev_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x3a,0xd7,0xff,0xff,0x03,0x00] -v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi +v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_ashrrev_i64 v[5:6], v1, vcc @@ -2732,13 +2732,13 @@ v_lshlrev_b16 v5.l, src_scc, vcc_lo v_lshlrev_b16 v255.l, 0xfe0b, vcc_hi // GFX11: v_lshlrev_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshlrev_b16 v5.l, v1.h, v2.l +v_lshlrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_lshlrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x38,0xd7,0x01,0x05,0x02,0x00] -v_lshlrev_b16 v5.l, v255.l, v255.h +v_lshlrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_lshlrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x38,0xd7,0xff,0xff,0x03,0x00] -v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi +v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_lshlrev_b64 v[5:6], v1, vcc @@ -2813,13 +2813,13 @@ v_lshrrev_b16 v5.l, src_scc, vcc_lo v_lshrrev_b16 v255.l, 0xfe0b, vcc_hi // GFX11: v_lshrrev_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshrrev_b16 v5.l, v1.h, v2.l +v_lshrrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_lshrrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x39,0xd7,0x01,0x05,0x02,0x00] -v_lshrrev_b16 v5.l, v255.l, v255.h +v_lshrrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_lshrrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x39,0xd7,0xff,0xff,0x03,0x00] -v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi +v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_lshrrev_b64 v[5:6], v1, vcc @@ -3740,13 +3740,13 @@ v_max_i16 v5.l, src_scc, vcc_lo v_max_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_max_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_i16 v5.l, v1.h, v2.l +v_max_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_max_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0a,0xd7,0x01,0x05,0x02,0x00] -v_max_i16 v5.l, v255.l, v255.h +v_max_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_max_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0a,0xd7,0xff,0xff,0x03,0x00] -v_max_i16 v255.h, 0xfe0b, vcc_hi +v_max_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_max_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_max_u16 v5.l, v1.l, v2.l @@ -3794,13 +3794,13 @@ v_max_u16 v5.l, src_scc, vcc_lo v_max_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_max_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_u16 v5.l, v1.h, v2.l +v_max_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_max_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x09,0xd7,0x01,0x05,0x02,0x00] -v_max_u16 v5.l, v255.l, v255.h +v_max_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_max_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x09,0xd7,0xff,0xff,0x03,0x00] -v_max_u16 v255.h, 0xfe0b, vcc_hi +v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_maxmin_f16 v5.l, v1.l, v2.l, s3 @@ -4829,13 +4829,13 @@ v_min_i16 v5.l, src_scc, vcc_lo v_min_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_min_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_i16 v5.l, v1.h, v2.l +v_min_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_min_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0c,0xd7,0x01,0x05,0x02,0x00] -v_min_i16 v5.l, v255.l, v255.h +v_min_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_min_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0c,0xd7,0xff,0xff,0x03,0x00] -v_min_i16 v255.h, 0xfe0b, vcc_hi +v_min_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_min_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_min_u16 v5.l, v1.l, v2.l @@ -4883,13 +4883,13 @@ v_min_u16 v5.l, src_scc, vcc_lo v_min_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_min_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_u16 v5.l, v1.h, v2.l +v_min_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_min_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0b,0xd7,0x01,0x05,0x02,0x00] -v_min_u16 v5.l, v255.l, v255.h +v_min_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_min_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0b,0xd7,0xff,0xff,0x03,0x00] -v_min_u16 v255.h, 0xfe0b, vcc_hi +v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_minmax_f16 v5.l, v1.l, v2.l, s3 @@ -5390,13 +5390,13 @@ v_mul_lo_u16 v5.l, src_scc, vcc_lo v_mul_lo_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_mul_lo_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u16 v5.l, v1.h, v2.l +v_mul_lo_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] // GFX11: v_mul_lo_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x05,0xd7,0x01,0x05,0x02,0x00] -v_mul_lo_u16 v5.l, v255.l, v255.h +v_mul_lo_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] // GFX11: v_mul_lo_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x05,0xd7,0xff,0xff,0x03,0x00] -v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi +v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] // GFX11: v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_mul_lo_u32 v5, v1, v2 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index f89833f9da651..1fae79d9469b3 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1904,16 +1904,16 @@ v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -1958,16 +1958,16 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] @@ -2702,16 +2702,16 @@ v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_max_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_max_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -2756,16 +2756,16 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_max_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_max_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] @@ -3791,16 +3791,16 @@ v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_min_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_min_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -3845,16 +3845,16 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_min_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 // GFX11: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_min_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 80ee725db319f..40d844d62f6f5 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -1093,16 +1093,16 @@ v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -1114,16 +1114,16 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] @@ -1696,16 +1696,16 @@ v_max_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_max_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -1717,16 +1717,16 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] @@ -2509,16 +2509,16 @@ v_min_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_min_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -2530,16 +2530,16 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] +v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s index 57135ba148104..c898aaf6a45e4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s @@ -3679,70 +3679,136 @@ v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255.h v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255.l // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1.h, v255.h +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v127, v255 +v_cmp_tru_f16_e32 vcc, v1.l, v255.l // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +v_cmp_tru_f16_e32 vcc, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +v_cmp_tru_f16_e32 vcc, v1.l, v255.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v128, v2 +v_cmp_tru_f16_e32 vcc, v127.h, v255.h +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127.l, v255.l +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v127.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128.h, v2.h +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc, v128.l, v2.l // GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +v_cmp_tru_f16_e32 vcc, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +v_cmp_tru_f16_e32 vcc, v128.l, v2.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, vcc_hi, v255 +v_cmp_tru_f16_e32 vcc, vcc_hi, v255.h // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, vcc_lo, v255 +v_cmp_tru_f16_e32 vcc, vcc_hi, v255.l // GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, vcc_lo, v255.h +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, vcc_lo, v255.l +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v1.h, v255.h +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v127, v255 +v_cmp_tru_f16_e32 vcc_lo, v1.h, v255.h quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +v_cmp_tru_f16_e32 vcc_lo, v1.l, v255.l // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +v_cmp_tru_f16_e32 vcc_lo, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v128, v2 +v_cmp_tru_f16_e32 vcc_lo, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.h, v255.h +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.l, v255.l +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v127.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128.h, v2.h +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, v128.l, v2.l // GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +v_cmp_tru_f16_e32 vcc_lo, v128.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +v_cmp_tru_f16_e32 vcc_lo, v128.l, v2.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255 +v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255.h +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255.l +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255.h // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255 +v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255.l // GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_u_f16_e32 vcc, v1.h, v255.h diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s index f48770eee0cb2..bc008c2729941 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s @@ -1009,22 +1009,40 @@ v_cmpx_t_f16_e32 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] v_cmpx_t_f16_e32 v255.l, v2.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1.h, v255.h +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1.h, v255.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1.l, v255.l +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255.h, v2.h +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction + +v_cmpx_tru_f16_e32 v255.h, v2.h quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v255, v2 +v_cmpx_tru_f16_e32 v255.l, v2.l // GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +v_cmpx_tru_f16_e32 v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0] +v_cmpx_tru_f16_e32 v255.l, v2.l quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction v_cmpx_u_f16_e32 v1.h, v255.h diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s index 3cd040a253bca..be2ed508df964 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s @@ -2,9 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// this file will be converted to true16 format when more true16 instructions are supported - -v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] +v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] // GFX12: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s index fe56179e77775..656dc5c10dded 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s @@ -2,9 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// this file will be converted to true16 format when more true16 instructions are supported - -v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s index 6230fbac35898..5fc52ef6ba9c1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo // W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode @@ -1102,50 +1102,62 @@ v_lshrrev_b32 v5, src_scc, v2 v_lshrrev_b32 v255, 0xaf123456, v255 // GFX12: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] -v_max_num_f16 v5, v1, v2 -// GFX12: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] +v_max_num_f16 v5.l, v1.l, v2.l +// GFX12: v_max_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x62] -v_max_num_f16 v5, v127, v2 -// GFX12: v_max_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x62] +v_max_num_f16 v5.l, v127.l, v2.l +// GFX12: v_max_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x62] -v_max_num_f16 v5, s1, v2 -// GFX12: v_max_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x62] +v_max_num_f16 v5.l, s1, v2.l +// GFX12: v_max_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x62] -v_max_num_f16 v5, s105, v2 -// GFX12: v_max_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x62] +v_max_num_f16 v5.l, s105, v2.l +// GFX12: v_max_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x62] -v_max_num_f16 v5, vcc_lo, v2 -// GFX12: v_max_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x62] +v_max_num_f16 v5.l, vcc_lo, v2.l +// GFX12: v_max_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x62] -v_max_num_f16 v5, vcc_hi, v2 -// GFX12: v_max_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x62] +v_max_num_f16 v5.l, vcc_hi, v2.l +// GFX12: v_max_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x62] -v_max_num_f16 v5, ttmp15, v2 -// GFX12: v_max_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x62] +v_max_num_f16 v5.l, ttmp15, v2.l +// GFX12: v_max_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x62] -v_max_num_f16 v5, m0, v2 -// GFX12: v_max_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x62] +v_max_num_f16 v5.l, m0, v2.l +// GFX12: v_max_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x62] -v_max_num_f16 v5, exec_lo, v2 -// GFX12: v_max_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x62] +v_max_num_f16 v5.l, exec_lo, v2.l +// GFX12: v_max_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x62] -v_max_num_f16 v5, exec_hi, v2 -// GFX12: v_max_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x62] +v_max_num_f16 v5.l, exec_hi, v2.l +// GFX12: v_max_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x62] -v_max_num_f16 v5, null, v2 -// GFX12: v_max_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x62] +v_max_num_f16 v5.l, null, v2.l +// GFX12: v_max_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x62] -v_max_num_f16 v5, -1, v2 -// GFX12: v_max_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x62] +v_max_num_f16 v5.l, -1, v2.l +// GFX12: v_max_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x62] -v_max_num_f16 v5, 0.5, v2 -// GFX12: v_max_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x62] +v_max_num_f16 v5.l, 0.5, v2.l +// GFX12: v_max_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x62] -v_max_num_f16 v5, src_scc, v2 -// GFX12: v_max_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x62] +v_max_num_f16 v5.l, src_scc, v2.l +// GFX12: v_max_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x62] -v_max_num_f16 v127, 0xfe0b, v127 -// GFX12: v_max_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] +v_max_num_f16 v127.l, 0xfe0b, v127.l +// GFX12: v_max_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] + +v_max_num_f16 v5.l, v1.h, v2.l +// GFX12: v_max_num_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x62] + +v_max_num_f16 v5.l, v127.h, v2.l +// GFX12: v_max_num_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x62] + +v_max_num_f16 v5.h, src_scc, v2.h +// GFX12: v_max_num_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x63] + +v_max_num_f16 v127.h, 0xfe0b, v127.h +// GFX12: v_max_num_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x63,0x0b,0xfe,0x00,0x00] v_max_num_f32 v5, v1, v2 // GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] @@ -1318,50 +1330,62 @@ v_max_u32 v5, src_scc, v2 v_max_u32 v255, 0xaf123456, v255 // GFX12: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] -v_min_num_f16 v5, v1, v2 -// GFX12: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] +v_min_num_f16 v5.l, v1.l, v2.l +// GFX12: v_min_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x60] + +v_min_num_f16 v5.l, v127.l, v2.l +// GFX12: v_min_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x60] + +v_min_num_f16 v5.l, s1, v2.l +// GFX12: v_min_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x60] + +v_min_num_f16 v5.l, s105, v2.l +// GFX12: v_min_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x60] + +v_min_num_f16 v5.l, vcc_lo, v2.l +// GFX12: v_min_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x60] -v_min_num_f16 v5, v127, v2 -// GFX12: v_min_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x60] +v_min_num_f16 v5.l, vcc_hi, v2.l +// GFX12: v_min_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x60] -v_min_num_f16 v5, s1, v2 -// GFX12: v_min_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x60] +v_min_num_f16 v5.l, ttmp15, v2.l +// GFX12: v_min_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x60] -v_min_num_f16 v5, s105, v2 -// GFX12: v_min_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x60] +v_min_num_f16 v5.l, m0, v2.l +// GFX12: v_min_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x60] -v_min_num_f16 v5, vcc_lo, v2 -// GFX12: v_min_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x60] +v_min_num_f16 v5.l, exec_lo, v2.l +// GFX12: v_min_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x60] -v_min_num_f16 v5, vcc_hi, v2 -// GFX12: v_min_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x60] +v_min_num_f16 v5.l, exec_hi, v2.l +// GFX12: v_min_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x60] -v_min_num_f16 v5, ttmp15, v2 -// GFX12: v_min_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x60] +v_min_num_f16 v5.l, null, v2.l +// GFX12: v_min_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x60] -v_min_num_f16 v5, m0, v2 -// GFX12: v_min_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x60] +v_min_num_f16 v5.l, -1, v2.l +// GFX12: v_min_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x60] -v_min_num_f16 v5, exec_lo, v2 -// GFX12: v_min_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x60] +v_min_num_f16 v5.l, 0.5, v2.l +// GFX12: v_min_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x60] -v_min_num_f16 v5, exec_hi, v2 -// GFX12: v_min_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x60] +v_min_num_f16 v5.l, src_scc, v2.l +// GFX12: v_min_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x60] -v_min_num_f16 v5, null, v2 -// GFX12: v_min_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x60] +v_min_num_f16 v127.l, 0xfe0b, v127.l +// GFX12: v_min_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] -v_min_num_f16 v5, -1, v2 -// GFX12: v_min_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x60] +v_min_num_f16 v5.l, v1.h, v2.l +// GFX12: v_min_num_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x60] -v_min_num_f16 v5, 0.5, v2 -// GFX12: v_min_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x60] +v_min_num_f16 v5.l, v127.h, v2.l +// GFX12: v_min_num_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x60] -v_min_num_f16 v5, src_scc, v2 -// GFX12: v_min_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x60] +v_min_num_f16 v5.h, src_scc, v2.h +// GFX12: v_min_num_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x61] -v_min_num_f16 v127, 0xfe0b, v127 -// GFX12: v_min_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] +v_min_num_f16 v127.h, 0xfe0b, v127.h +// GFX12: v_min_num_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x61,0x0b,0xfe,0x00,0x00] v_min_num_f32 v5, v1, v2 // GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s index c9f05df0e7c02..4611626a0a1db 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode @@ -750,47 +750,53 @@ v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi: v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] -v_max_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] +v_max_num_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] -v_max_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] +v_max_num_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] -v_max_num_f16 v5, v1, v2 row_mirror -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_mirror +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_half_mirror -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_shl:1 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_shl:15 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_shr:1 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_shr:15 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_ror:1 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_ror:15 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] +v_max_num_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] -v_max_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] +v_max_num_f16 v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] -v_max_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x09,0x13] +v_max_num_f16 v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x09,0x13] -v_max_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xf5,0x30] +v_max_num_f16 v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xf5,0x30] + +v_max_num_f16 v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max_num_f16_dpp v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x63,0x81,0x60,0x09,0x13] + +v_max_num_f16 v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max_num_f16_dpp v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x63,0xff,0x6f,0xf5,0x30] v_max_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] @@ -918,47 +924,53 @@ v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] -v_min_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] +v_min_num_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] + +v_min_num_f16 v5.l, v1.l, v2.l row_mirror +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] -v_min_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_mirror -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_half_mirror -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_shl:1 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_shl:15 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_shr:1 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_shr:15 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_ror:1 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] -v_min_num_f16 v5, v1, v2 row_ror:15 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] -v_min_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] +v_min_num_f16 v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x09,0x13] -v_min_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] +v_min_num_f16 v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xf5,0x30] -v_min_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x09,0x13] +v_min_num_f16 v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min_num_f16_dpp v5.h, v1.h, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x04,0x0b,0x61,0x81,0x60,0x09,0x13] -v_min_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xf5,0x30] +v_min_num_f16 v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min_num_f16_dpp v127.h, -|v127.h|, -|v127.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x61,0xff,0x6f,0xf5,0x30] v_min_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s index 0063b53fe743c..2af55189f3e5c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode @@ -178,14 +178,20 @@ v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] +v_max_num_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] -v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] +v_max_num_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] -v_max_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_max_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] +v_max_num_f16 v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] + +v_max_num_f16 v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max_num_f16_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x63,0x81,0x77,0x39,0x05] + +v_max_num_f16 v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max_num_f16_dpp v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x63,0xff,0x00,0x00,0x00] v_max_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_max_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] @@ -214,14 +220,20 @@ v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] +v_min_num_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] -v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] +v_min_num_f16 v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min_num_f16_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x04,0x0b,0x61,0x81,0x77,0x39,0x05] -v_min_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_min_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] +v_min_num_f16 v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min_num_f16_dpp v127.h, v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xff,0x61,0xff,0x00,0x00,0x00] v_min_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_min_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s index 7768e26c972b0..8e68389dd09cd 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s @@ -190,59 +190,113 @@ v_ldexp_f16_e32 v5.h, v255.h, v2.h v_ldexp_f16_e32 v5.l, v255.l, v2.l // GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_max_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_max_num_f16_dpp v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +v_max_num_f16_dpp v255.h, v1.h, v2.h quad_perm:[3,2,1,0] // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_max_num_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_max_num_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_max_num_f16_dpp v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_max_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_max_num_f16_dpp v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_max_num_f16_dpp v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_max_num_f16_dpp v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_max_num_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_max_num_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_max_num_f16_e32 v255, v1, v2 +v_max_num_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_max_num_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_max_num_f16_e32 v255.h, v1.h, v2.h // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_max_num_f16_e32 v255.l, v1.l, v2.l +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_max_num_f16_e32 v5.h, v1.h, v255.h +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_max_num_f16_e32 v5.h, v255.h, v2.h +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_max_num_f16_e32 v5.l, v1.l, v255.l +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_max_num_f16_e32 v5.l, v255.l, v2.l +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction -v_min_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +v_min_num_f16_dpp v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +v_min_num_f16_dpp v255.h, v1.h, v2.h quad_perm:[3,2,1,0] // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_min_num_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_min_num_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_min_num_f16_dpp v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_min_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_min_num_f16_dpp v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_min_num_f16_dpp v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_min_num_f16_dpp v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_min_num_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_min_num_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_min_num_f16_e32 v255, v1, v2 +v_min_num_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_min_num_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_min_num_f16_e32 v255.h, v1.h, v2.h // GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction +v_min_num_f16_e32 v255.l, v1.l, v2.l +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction +v_min_num_f16_e32 v5.h, v1.h, v255.h +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_min_num_f16_e32 v5.h, v255.h, v2.h +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction + +v_min_num_f16_e32 v5.l, v1.l, v255.l +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_min_num_f16_e32 v5.l, v255.l, v2.l +// GFX12: :[[@LINE-1]]:25: error: invalid operand for instruction v_mul_f16_dpp v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX12: :[[@LINE-1]]:15: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s index c9c70b7d3ca5c..fe1271030f6f8 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s @@ -118,59 +118,113 @@ v_ldexp_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] v_ldexp_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] // GFX12: v_ldexp_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_num_f16 v255, v1, v2 -// GFX12: v_max_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] +v_max_num_f16 v255.h, v1.h, v2.h +// GFX12: v_max_num_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x31,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16 v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x58,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_num_f16 v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x58,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v5, v1, v255 -// GFX12: v_max_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0xff,0x03,0x00] +v_max_num_f16 v255.l, v1.l, v2.l +// GFX12: v_max_num_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16 v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_max_num_f16 v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v5, v255, v2 -// GFX12: v_max_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x31,0xd5,0xff,0x05,0x02,0x00] +v_max_num_f16 v5.h, v1.h, v255.h +// GFX12: v_max_num_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x31,0xd5,0x01,0xff,0x03,0x00] -v_max_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_max_num_f16 v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x31,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_max_num_f16 v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x31,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_min_num_f16 v255, v1, v2 -// GFX12: v_min_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] +v_max_num_f16 v5.h, v255.h, v2.h +// GFX12: v_max_num_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x31,0xd5,0xff,0x05,0x02,0x00] -v_min_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16 v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x31,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_num_f16 v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x31,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_num_f16 v5, v1, v255 -// GFX12: v_min_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0xff,0x03,0x00] +v_max_num_f16 v5.l, v1.l, v255.l +// GFX12: v_max_num_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x31,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16 v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +v_max_num_f16 v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_min_num_f16 v5, v255, v2 -// GFX12: v_min_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x30,0xd5,0xff,0x05,0x02,0x00] +v_max_num_f16 v5.l, v255.l, v2.l +// GFX12: v_max_num_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x31,0xd5,0xff,0x05,0x02,0x00] -v_min_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_max_num_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] +v_max_num_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_min_num_f16 v255.h, v1.h, v2.h +// GFX12: v_min_num_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x30,0xd5,0x01,0x05,0x02,0x00] + +v_min_num_f16 v255.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x58,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16 v255.h, v1.h, v2.h quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255.h, v1.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x58,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v255.l, v1.l, v2.l +// GFX12: v_min_num_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] + +v_min_num_f16 v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16 v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5.h, v1.h, v255.h +// GFX12: v_min_num_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x30,0xd5,0x01,0xff,0x03,0x00] + +v_min_num_f16 v5.h, v1.h, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x30,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16 v5.h, v1.h, v255.h quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.h, v1.h, v255.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x30,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5.h, v255.h, v2.h +// GFX12: v_min_num_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1] ; encoding: [0x05,0x58,0x30,0xd5,0xff,0x05,0x02,0x00] + +v_min_num_f16 v5.h, v255.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x30,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_min_num_f16 v5.h, v255.h, v2.h quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.h, v255.h, v2.h op_sel:[1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x30,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_min_num_f16 v5.l, v1.l, v255.l +// GFX12: v_min_num_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x30,0xd5,0x01,0xff,0x03,0x00] + +v_min_num_f16 v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16 v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5.l, v255.l, v2.l +// GFX12: v_min_num_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x30,0xd5,0xff,0x05,0x02,0x00] + +v_min_num_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_min_num_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] v_mul_f16 v255.h, v1.h, v2.h // GFX12: v_mul_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x35,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s index 63943fa3f8208..8fa842749989f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_aliases.s @@ -16,8 +16,8 @@ v_max3_f16 v5.l, v1.l, v2.l, v3.l v_med3_f32 v5, v1, v2, v3 // GFX12: v_med3_num_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x31,0xd6,0x01,0x05,0x0e,0x04] -v_med3_f16 v5, v1, v2, v3 -// GFX12: v_med3_num_f16 v5, v1, v2, v3 ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x04] +v_med3_f16 v5.l, v1.l, v2.l, v3.l +// GFX12: v_med3_num_f16 v5.l, v1.l, v2.l, v3.l ; encoding: [0x05,0x00,0x32,0xd6,0x01,0x05,0x0e,0x04] v_minmax_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] // GFX12: v_minmax_num_f32_e64_dpp v0, -v1, -v2, -v3 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0x00,0x00,0x68,0xd6,0xe9,0x04,0x0e,0xe4,0x01,0x88,0xc6,0xfa] @@ -43,11 +43,11 @@ v_max_f64 v[5:6], s[2:3], s[4:5] v_min_f64 v[5:6], s[2:3], s[4:5] // GFX12: v_min_num_f64_e64 v[5:6], s[2:3], s[4:5] ; encoding: [0x05,0x00,0x0d,0xd5,0x02,0x08,0x00,0x00] -v_cvt_pknorm_i16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pknorm_i16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pknorm_u16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pknorm_u16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] v_add3_nc_u32 v5, v1, v2, s3 // GFX12: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s index f16838919a4aa..fc11b965077f9 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s @@ -848,50 +848,59 @@ v_lshrrev_b32_e64 v5, src_scc, vcc_lo v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi // GFX12: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_max_num_f16_e64 v5, v1, v2 -// GFX12: v_max_num_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] +v_max_num_f16_e64 v5.l, v1.l, v2.l +// GFX12: v_max_num_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16_e64 v5, v255, v255 -// GFX12: v_max_num_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x31,0xd5,0xff,0xff,0x03,0x00] +v_max_num_f16_e64 v5.l, v255.l, v255.l +// GFX12: v_max_num_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x31,0xd5,0xff,0xff,0x03,0x00] -v_max_num_f16_e64 v5, s1, s2 -// GFX12: v_max_num_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0x04,0x00,0x00] +v_max_num_f16_e64 v5.l, s1, s2 +// GFX12: v_max_num_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0x04,0x00,0x00] -v_max_num_f16_e64 v5, s105, s105 -// GFX12: v_max_num_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x31,0xd5,0x69,0xd2,0x00,0x00] +v_max_num_f16_e64 v5.l, s105, s105 +// GFX12: v_max_num_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x31,0xd5,0x69,0xd2,0x00,0x00] -v_max_num_f16_e64 v5, vcc_lo, ttmp15 -// GFX12: v_max_num_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x31,0xd5,0x6a,0xf6,0x00,0x00] +v_max_num_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX12: v_max_num_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x31,0xd5,0x6a,0xf6,0x00,0x00] -v_max_num_f16_e64 v5, vcc_hi, 0xfe0b -// GFX12: v_max_num_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x31,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_max_num_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX12: v_max_num_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x31,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_max_num_f16_e64 v5, ttmp15, src_scc -// GFX12: v_max_num_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x31,0xd5,0x7b,0xfa,0x01,0x00] +v_max_num_f16_e64 v5.l, ttmp15, src_scc +// GFX12: v_max_num_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x31,0xd5,0x7b,0xfa,0x01,0x00] -v_max_num_f16_e64 v5, m0, 0.5 -// GFX12: v_max_num_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x31,0xd5,0x7d,0xe0,0x01,0x00] +v_max_num_f16_e64 v5.l, m0, 0.5 +// GFX12: v_max_num_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x31,0xd5,0x7d,0xe0,0x01,0x00] -v_max_num_f16_e64 v5, exec_lo, -1 -// GFX12: v_max_num_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x31,0xd5,0x7e,0x82,0x01,0x00] +v_max_num_f16_e64 v5.l, exec_lo, -1 +// GFX12: v_max_num_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x31,0xd5,0x7e,0x82,0x01,0x00] -v_max_num_f16_e64 v5, |exec_hi|, null -// GFX12: v_max_num_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x31,0xd5,0x7f,0xf8,0x00,0x00] +v_max_num_f16_e64 v5.l, |exec_hi|, null +// GFX12: v_max_num_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x31,0xd5,0x7f,0xf8,0x00,0x00] -v_max_num_f16_e64 v5, null, exec_lo -// GFX12: v_max_num_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x31,0xd5,0x7c,0xfc,0x00,0x00] +v_max_num_f16_e64 v5.l, null, exec_lo +// GFX12: v_max_num_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x31,0xd5,0x7c,0xfc,0x00,0x00] -v_max_num_f16_e64 v5, -1, exec_hi -// GFX12: v_max_num_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x31,0xd5,0xc1,0xfe,0x00,0x00] +v_max_num_f16_e64 v5.l, -1, exec_hi +// GFX12: v_max_num_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x31,0xd5,0xc1,0xfe,0x00,0x00] -v_max_num_f16_e64 v5, 0.5, -m0 mul:2 -// GFX12: v_max_num_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x31,0xd5,0xf0,0xfa,0x00,0x48] +v_max_num_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX12: v_max_num_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x31,0xd5,0xf0,0xfa,0x00,0x48] -v_max_num_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX12: v_max_num_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x31,0xd5,0xfd,0xd4,0x00,0x30] +v_max_num_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX12: v_max_num_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x31,0xd5,0xfd,0xd4,0x00,0x30] -v_max_num_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX12: v_max_num_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x31,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_max_num_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_max_num_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x31,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_max_num_f16_e64 v5.l, v1.h, v2.l +// GFX12: v_max_num_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x31,0xd5,0x01,0x05,0x02,0x00] + +v_max_num_f16_e64 v5.l, v255.l, v255.h +// GFX12: v_max_num_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x31,0xd5,0xff,0xff,0x03,0x00] + +v_max_num_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_max_num_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x31,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_max_num_f32_e64 v5, v1, v2 // GFX12: v_max_num_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x16,0xd5,0x01,0x05,0x02,0x00] @@ -1064,50 +1073,59 @@ v_max_u32_e64 v5, src_scc, vcc_lo v_max_u32_e64 v255, 0xaf123456, vcc_hi // GFX12: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_min_num_f16_e64 v5, v1, v2 -// GFX12: v_min_num_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] +v_min_num_f16_e64 v5.l, v1.l, v2.l +// GFX12: v_min_num_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] + +v_min_num_f16_e64 v5.l, v255.l, v255.l +// GFX12: v_min_num_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x30,0xd5,0xff,0xff,0x03,0x00] + +v_min_num_f16_e64 v5.l, s1, s2 +// GFX12: v_min_num_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0x04,0x00,0x00] + +v_min_num_f16_e64 v5.l, s105, s105 +// GFX12: v_min_num_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x30,0xd5,0x69,0xd2,0x00,0x00] -v_min_num_f16_e64 v5, v255, v255 -// GFX12: v_min_num_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x30,0xd5,0xff,0xff,0x03,0x00] +v_min_num_f16_e64 v5.l, vcc_lo, ttmp15 +// GFX12: v_min_num_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x30,0xd5,0x6a,0xf6,0x00,0x00] -v_min_num_f16_e64 v5, s1, s2 -// GFX12: v_min_num_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0x04,0x00,0x00] +v_min_num_f16_e64 v5.l, vcc_hi, 0xfe0b +// GFX12: v_min_num_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x30,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_min_num_f16_e64 v5, s105, s105 -// GFX12: v_min_num_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x30,0xd5,0x69,0xd2,0x00,0x00] +v_min_num_f16_e64 v5.l, ttmp15, src_scc +// GFX12: v_min_num_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x30,0xd5,0x7b,0xfa,0x01,0x00] -v_min_num_f16_e64 v5, vcc_lo, ttmp15 -// GFX12: v_min_num_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x30,0xd5,0x6a,0xf6,0x00,0x00] +v_min_num_f16_e64 v5.l, m0, 0.5 +// GFX12: v_min_num_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x30,0xd5,0x7d,0xe0,0x01,0x00] -v_min_num_f16_e64 v5, vcc_hi, 0xfe0b -// GFX12: v_min_num_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x30,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_min_num_f16_e64 v5.l, exec_lo, -1 +// GFX12: v_min_num_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x30,0xd5,0x7e,0x82,0x01,0x00] -v_min_num_f16_e64 v5, ttmp15, src_scc -// GFX12: v_min_num_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x30,0xd5,0x7b,0xfa,0x01,0x00] +v_min_num_f16_e64 v5.l, |exec_hi|, null +// GFX12: v_min_num_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x30,0xd5,0x7f,0xf8,0x00,0x00] -v_min_num_f16_e64 v5, m0, 0.5 -// GFX12: v_min_num_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x30,0xd5,0x7d,0xe0,0x01,0x00] +v_min_num_f16_e64 v5.l, null, exec_lo +// GFX12: v_min_num_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x30,0xd5,0x7c,0xfc,0x00,0x00] -v_min_num_f16_e64 v5, exec_lo, -1 -// GFX12: v_min_num_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x30,0xd5,0x7e,0x82,0x01,0x00] +v_min_num_f16_e64 v5.l, -1, exec_hi +// GFX12: v_min_num_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x30,0xd5,0xc1,0xfe,0x00,0x00] -v_min_num_f16_e64 v5, |exec_hi|, null -// GFX12: v_min_num_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x30,0xd5,0x7f,0xf8,0x00,0x00] +v_min_num_f16_e64 v5.l, 0.5, -m0 mul:2 +// GFX12: v_min_num_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x30,0xd5,0xf0,0xfa,0x00,0x48] -v_min_num_f16_e64 v5, null, exec_lo -// GFX12: v_min_num_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x30,0xd5,0x7c,0xfc,0x00,0x00] +v_min_num_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 +// GFX12: v_min_num_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x30,0xd5,0xfd,0xd4,0x00,0x30] -v_min_num_f16_e64 v5, -1, exec_hi -// GFX12: v_min_num_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x30,0xd5,0xc1,0xfe,0x00,0x00] +v_min_num_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_min_num_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x30,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -v_min_num_f16_e64 v5, 0.5, -m0 mul:2 -// GFX12: v_min_num_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x30,0xd5,0xf0,0xfa,0x00,0x48] +v_min_num_f16_e64 v5.l, v1.h, v2.l +// GFX12: v_min_num_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x30,0xd5,0x01,0x05,0x02,0x00] -v_min_num_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX12: v_min_num_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x30,0xd5,0xfd,0xd4,0x00,0x30] +v_min_num_f16_e64 v5.l, v255.l, v255.h +// GFX12: v_min_num_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x30,0xd5,0xff,0xff,0x03,0x00] -v_min_num_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX12: v_min_num_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x30,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_min_num_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX12: v_min_num_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x30,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] v_min_num_f32_e64 v5, v1, v2 // GFX12: v_min_num_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x15,0xd5,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s index 301f756e906a1..ee28cc85bc5c7 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s @@ -764,53 +764,65 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, s2 row_shl:15 -// GFX12: v_max_num_f16_e64_dpp v5, v1, s2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, s2 row_shl:15 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, s2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 -// GFX12: v_max_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, 2.0 row_shl:15 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, 2.0 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_max_num_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x31,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_max_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x31,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_max_num_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_num_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x31,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x31,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x31,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] +v_max_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x31,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] + +v_max_num_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_max_num_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_num_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_max_num_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x31,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x31,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_max_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x31,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -956,53 +968,65 @@ v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, s2 row_shl:15 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, s2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, 2.0 row_shl:15 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, 2.0 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, s2 row_shl:15 -// GFX12: v_min_num_f16_e64_dpp v5, v1, s2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 -// GFX12: v_min_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_num_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x30,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_min_num_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x30,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_min_num_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x30,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] -v_min_num_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min_num_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_min_num_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x30,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_min_num_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_min_num_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x30,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -v_min_num_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_num_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x30,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x30,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] -v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x30,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] +v_min_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_min_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x30,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30] v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s index 7390720e4dd5a..2bdad0875e000 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s @@ -317,23 +317,35 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5.l, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5.l, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] -v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -v_max_num_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_max_num_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x31,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x31,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x31,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +v_max_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x31,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_num_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_max_num_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x31,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_max_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x31,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -383,23 +395,35 @@ v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5.l, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5.l, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, v1.l, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x30,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_min_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x30,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -v_min_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -v_min_num_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_min_num_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x30,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_min_num_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x30,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x30,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +v_min_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_min_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x30,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] From 10e85d7f96e565804afdca87dc27b5a9b911cdd5 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 7 May 2025 13:22:19 -0400 Subject: [PATCH 2/3] revert some unnecessary change --- llvm/test/MC/AMDGPU/gfx11_asm_vop1.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s | 8 ++++---- llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s | 2 +- llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s | 2 +- llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s | 2 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2.s | 2 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s | 2 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s index ff9a09941b173..93e01954bea55 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s -v_bfrev_b32 v5, v1 +v_bfrev_b32_e32 v5, v1 // GFX11: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] v_bfrev_b32 v5, v255 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s index 40786454584e7..9fadc7554f2e4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp16.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] +v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] // GFX11: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s index 160a0ec2c4eb6..ba0c3495de2bb 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop1_dpp8.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index b1233620fe613..163010adf7bd4 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s index 471f3f1baa677..2695059853b08 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode @@ -459,15 +459,15 @@ v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 boun // W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32 v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32 v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cndmask_b32 v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] // W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s index bce9f42ae52bc..b379a5d06b99b 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s index be2ed508df964..9ccdd2f604cb2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp16.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -v_bfrev_b32 v5, v1 quad_perm:[3,2,1,0] +v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] // GFX12: v_bfrev_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x70,0x0a,0x7e,0x01,0x1b,0x00,0xff] v_bfrev_b32 v5, v1 quad_perm:[0,1,2,3] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s index 656dc5c10dded..61ee43dc9d164 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1_dpp8.s @@ -2,7 +2,7 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] v_bfrev_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s index 5fc52ef6ba9c1..2bbd990bc46c1 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s index 4611626a0a1db..d4378ae0a210a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s index 2af55189f3e5c..a81cd58acf036 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s @@ -4,7 +4,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s -v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode From 7df22d7bb8b63738bb2c8589cd5d5ff0a854b5fd Mon Sep 17 00:00:00 2001 From: guochen2 Date: Wed, 7 May 2025 13:27:08 -0400 Subject: [PATCH 3/3] revert opsel change --- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 48 +++++++++++----------- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 48 +++++++++++----------- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 48 +++++++++++----------- 3 files changed, 72 insertions(+), 72 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 14949ed257000..ce452957b0198 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -644,13 +644,13 @@ v_ashrrev_i16 v5.l, src_scc, vcc_lo v_ashrrev_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_ashrrev_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_ashrrev_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_ashrrev_i16 v5.l, v1.h, v2.l // GFX11: v_ashrrev_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x3a,0xd7,0x01,0x05,0x02,0x00] -v_ashrrev_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_ashrrev_i16 v5.l, v255.l, v255.h // GFX11: v_ashrrev_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x3a,0xd7,0xff,0xff,0x03,0x00] -v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi // GFX11: v_ashrrev_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_ashrrev_i64 v[5:6], v1, vcc @@ -2732,13 +2732,13 @@ v_lshlrev_b16 v5.l, src_scc, vcc_lo v_lshlrev_b16 v255.l, 0xfe0b, vcc_hi // GFX11: v_lshlrev_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshlrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_lshlrev_b16 v5.l, v1.h, v2.l // GFX11: v_lshlrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x38,0xd7,0x01,0x05,0x02,0x00] -v_lshlrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_lshlrev_b16 v5.l, v255.l, v255.h // GFX11: v_lshlrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x38,0xd7,0xff,0xff,0x03,0x00] -v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi // GFX11: v_lshlrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_lshlrev_b64 v[5:6], v1, vcc @@ -2813,13 +2813,13 @@ v_lshrrev_b16 v5.l, src_scc, vcc_lo v_lshrrev_b16 v255.l, 0xfe0b, vcc_hi // GFX11: v_lshrrev_b16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshrrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_lshrrev_b16 v5.l, v1.h, v2.l // GFX11: v_lshrrev_b16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x39,0xd7,0x01,0x05,0x02,0x00] -v_lshrrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_lshrrev_b16 v5.l, v255.l, v255.h // GFX11: v_lshrrev_b16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x39,0xd7,0xff,0xff,0x03,0x00] -v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi // GFX11: v_lshrrev_b16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_lshrrev_b64 v[5:6], v1, vcc @@ -3740,13 +3740,13 @@ v_max_i16 v5.l, src_scc, vcc_lo v_max_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_max_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_max_i16 v5.l, v1.h, v2.l // GFX11: v_max_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0a,0xd7,0x01,0x05,0x02,0x00] -v_max_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_max_i16 v5.l, v255.l, v255.h // GFX11: v_max_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0a,0xd7,0xff,0xff,0x03,0x00] -v_max_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_max_i16 v255.h, 0xfe0b, vcc_hi // GFX11: v_max_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_max_u16 v5.l, v1.l, v2.l @@ -3794,13 +3794,13 @@ v_max_u16 v5.l, src_scc, vcc_lo v_max_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_max_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_max_u16 v5.l, v1.h, v2.l // GFX11: v_max_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x09,0xd7,0x01,0x05,0x02,0x00] -v_max_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_max_u16 v5.l, v255.l, v255.h // GFX11: v_max_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x09,0xd7,0xff,0xff,0x03,0x00] -v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_max_u16 v255.h, 0xfe0b, vcc_hi // GFX11: v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_maxmin_f16 v5.l, v1.l, v2.l, s3 @@ -4829,13 +4829,13 @@ v_min_i16 v5.l, src_scc, vcc_lo v_min_i16 v255.l, 0xfe0b, vcc_hi // GFX11: v_min_i16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_min_i16 v5.l, v1.h, v2.l // GFX11: v_min_i16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0c,0xd7,0x01,0x05,0x02,0x00] -v_min_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_min_i16 v5.l, v255.l, v255.h // GFX11: v_min_i16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0c,0xd7,0xff,0xff,0x03,0x00] -v_min_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_min_i16 v255.h, 0xfe0b, vcc_hi // GFX11: v_min_i16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_min_u16 v5.l, v1.l, v2.l @@ -4883,13 +4883,13 @@ v_min_u16 v5.l, src_scc, vcc_lo v_min_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_min_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_min_u16 v5.l, v1.h, v2.l // GFX11: v_min_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x0b,0xd7,0x01,0x05,0x02,0x00] -v_min_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_min_u16 v5.l, v255.l, v255.h // GFX11: v_min_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x0b,0xd7,0xff,0xff,0x03,0x00] -v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_min_u16 v255.h, 0xfe0b, vcc_hi // GFX11: v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_minmax_f16 v5.l, v1.l, v2.l, s3 @@ -5390,13 +5390,13 @@ v_mul_lo_u16 v5.l, src_scc, vcc_lo v_mul_lo_u16 v255.l, 0xfe0b, vcc_hi // GFX11: v_mul_lo_u16 v255.l, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_mul_lo_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] +v_mul_lo_u16 v5.l, v1.h, v2.l // GFX11: v_mul_lo_u16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x05,0xd7,0x01,0x05,0x02,0x00] -v_mul_lo_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] +v_mul_lo_u16 v5.l, v255.l, v255.h // GFX11: v_mul_lo_u16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x05,0xd7,0xff,0xff,0x03,0x00] -v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] +v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi // GFX11: v_mul_lo_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x05,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_mul_lo_u32 v5, v1, v2 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 1fae79d9469b3..f89833f9da651 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1904,16 +1904,16 @@ v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -1958,16 +1958,16 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] @@ -2702,16 +2702,16 @@ v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_max_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_max_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_max_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_max_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -2756,16 +2756,16 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_max_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_max_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_max_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_max_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] @@ -3791,16 +3791,16 @@ v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_min_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_min_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_min_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_min_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] @@ -3845,16 +3845,16 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf +v_min_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf // GFX11: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +v_min_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 // GFX11: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +v_min_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 // GFX11: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +v_min_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 40d844d62f6f5..80ee725db319f 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -1093,16 +1093,16 @@ v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x38,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x38,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -1114,16 +1114,16 @@ v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x39,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] @@ -1696,16 +1696,16 @@ v_max_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_max_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_max_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_max_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0a,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_max_i16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_max_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -1717,16 +1717,16 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_max_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_max_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_max_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x09,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_max_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] @@ -2509,16 +2509,16 @@ v_min_i16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_i16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_min_i16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_min_i16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_min_i16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0c,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_min_i16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_min_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] @@ -2530,16 +2530,16 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_u16_e64_dpp v255.l, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] +v_min_u16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +v_min_u16_e64_dpp v5.l, v1.h, v2.l dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x08,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 +v_min_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX11: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x10,0x0b,0xd7,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] +v_min_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0]