diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index aee2f2cb3d771..b6f95686704be 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1918,6 +1918,7 @@ let AssemblerPredicate = isGFX11Plus in { // These instructions differ from GFX12 variant by supporting DPP: defm V_LSHL_ADD_U64 : VOP3Only_Realtriple_gfx1250<0x252>; +defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll index 5b4866c386793..6823a472c3ac6 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX-942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GCN,GFX-950 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GFX1250 %s ; TODO: Add global-isel when it can support bf16 @@ -9,6 +10,11 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_v(bfloat %v) { ; GCN: ; %bb.0: ; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GCN-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_bf16_f32_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-NEXT: ; return to shader part epilog %cvt = fpext bfloat %v to float ret float %cvt } @@ -19,6 +25,13 @@ define amdgpu_ps float @v_test_cvt_bf16_f32_s(bfloat inreg %v) { ; GCN-NEXT: s_lshl_b32 s0, s0, 16 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_bf16_f32_s: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_lshl_b32 s0, s0, 16 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v0, s0 +; GFX1250-NEXT: ; return to shader part epilog %cvt = fpext bfloat %v to float ret float %cvt } @@ -47,6 +60,11 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_v(<2 x float> %src) { ; GFX-950: ; %bb.0: ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_v2f32_v2bf16_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 +; GFX1250-NEXT: ; return to shader part epilog %res = fptrunc <2 x float> %src to <2 x bfloat> %cast = bitcast <2 x bfloat> %res to float ret float %cast @@ -80,6 +98,11 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) { ; GFX-950-NEXT: v_mov_b32_e32 v0, s1 ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, s0, v0 ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_v2f32_v2bf16_s: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, s0, s1 +; GFX1250-NEXT: ; return to shader part epilog %res = fptrunc <2 x float> %src to <2 x bfloat> %cast = bitcast <2 x bfloat> %res to float ret float %cast @@ -103,6 +126,13 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX-950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_f32_bf16_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-NEXT: ; return to shader part epilog %trunc = fptrunc float %src to bfloat %ext = fpext bfloat %trunc to float ret float %ext @@ -172,6 +202,38 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) { ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4 ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: v_test_cvt_v2f64_v2bf16_v: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_cvt_f32_f64_e32 v8, v[2:3] +; GFX1250-NEXT: v_cvt_f32_f64_e32 v9, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v8 +; GFX1250-NEXT: v_cvt_f64_f32_e32 v[6:7], v9 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_cmp_gt_f64_e64 s1, |v[2:3]|, |v[4:5]| +; GFX1250-NEXT: v_cmp_nlg_f64_e32 vcc_lo, v[2:3], v[4:5] +; GFX1250-NEXT: v_cmp_nlg_f64_e64 s0, v[0:1], v[6:7] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_cndmask_b32_e64 v2, -1, 1, s1 +; GFX1250-NEXT: v_cmp_gt_f64_e64 s1, |v[0:1]|, |v[6:7]| +; GFX1250-NEXT: v_dual_add_nc_u32 v1, v8, v2 :: v_dual_bitop2_b32 v10, 1, v8 bitop3:0x40 +; GFX1250-NEXT: s_wait_alu 0xf1ff +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_cndmask_b32_e64 v0, -1, 1, s1 +; GFX1250-NEXT: v_and_b32_e32 v11, 1, v9 +; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, 1, v10 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_add_nc_u32_e32 v0, v9, v0 +; GFX1250-NEXT: v_cmp_eq_u32_e64 s2, 1, v11 +; GFX1250-NEXT: s_or_b32 vcc_lo, s1, vcc_lo +; GFX1250-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc_lo +; GFX1250-NEXT: s_or_b32 vcc_lo, s2, s0 +; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 +; GFX1250-NEXT: ; return to shader part epilog %res = fptrunc <2 x double> %src to <2 x bfloat> %cast = bitcast <2 x bfloat> %res to float ret float %cast @@ -201,6 +263,11 @@ define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16(float %a, float %b) { ; GFX-950: ; %bb.0: ; %entry ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: fptrunc_f32_f32_to_v2bf16: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1 +; GFX1250-NEXT: ; return to shader part epilog entry: %a.cvt = fptrunc float %a to bfloat %b.cvt = fptrunc float %b to bfloat @@ -236,6 +303,11 @@ define amdgpu_ps float @fptrunc_f32_f32_to_v2bf16_mods(float %a, float %b) { ; GFX-950: ; %bb.0: ; %entry ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, -v0, |v1| ; GFX-950-NEXT: ; return to shader part epilog +; +; GFX1250-LABEL: fptrunc_f32_f32_to_v2bf16_mods: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, |v1| +; GFX1250-NEXT: ; return to shader part epilog entry: %a.neg = fneg float %a %a.cvt = fptrunc float %a.neg to bfloat @@ -269,6 +341,13 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f32_to_bf16: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.cvt = fptrunc float %a to bfloat store bfloat %a.cvt, ptr %out @@ -298,6 +377,13 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f32_to_bf16_abs: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.abs = call float @llvm.fabs.f32(float %a) %a.cvt = fptrunc float %a.abs to bfloat @@ -328,6 +414,13 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f32_to_bf16_neg: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.neg = fneg float %a %a.cvt = fptrunc float %a.neg to bfloat @@ -373,6 +466,24 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f64_to_bf16: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_cvt_f32_f64_e32 v6, v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX1250-NEXT: v_cmp_gt_f64_e64 s0, |v[0:1]|, |v[4:5]| +; GFX1250-NEXT: v_cmp_nlg_f64_e32 vcc_lo, v[0:1], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cndmask_b32_e64 v0, -1, 1, s0 +; GFX1250-NEXT: v_dual_add_nc_u32 v0, v6, v0 :: v_dual_bitop2_b32 v7, 1, v6 bitop3:0x40 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cmp_eq_u32_e64 s0, 1, v7 +; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 +; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.cvt = fptrunc double %a to bfloat store bfloat %a.cvt, ptr %out @@ -417,6 +528,25 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f64_to_bf16_neg: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_cvt_f32_f64_e64 v6, -v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX1250-NEXT: v_cmp_gt_f64_e64 s1, |v[0:1]|, |v[4:5]| +; GFX1250-NEXT: v_cmp_nlg_f64_e64 s0, -v[0:1], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cndmask_b32_e64 v0, -1, 1, s1 +; GFX1250-NEXT: v_dual_add_nc_u32 v0, v6, v0 :: v_dual_bitop2_b32 v7, 1, v6 bitop3:0x40 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 +; GFX1250-NEXT: s_or_b32 vcc_lo, s0, vcc_lo +; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.neg = fneg double %a %a.cvt = fptrunc double %a.neg to bfloat @@ -462,6 +592,25 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) { ; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX-950-NEXT: flat_store_short v[2:3], v0 ; GFX-950-NEXT: s_endpgm +; +; GFX1250-LABEL: fptrunc_f64_to_bf16_abs: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]| +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX1250-NEXT: v_cmp_gt_f64_e64 s1, |v[0:1]|, |v[4:5]| +; GFX1250-NEXT: v_cmp_nlg_f64_e64 s0, |v[0:1]|, v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_cndmask_b32_e64 v0, -1, 1, s1 +; GFX1250-NEXT: v_dual_add_nc_u32 v0, v6, v0 :: v_dual_bitop2_b32 v7, 1, v6 bitop3:0x40 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 +; GFX1250-NEXT: s_or_b32 vcc_lo, s0, vcc_lo +; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 +; GFX1250-NEXT: flat_store_b16 v[2:3], v0 +; GFX1250-NEXT: s_endpgm entry: %a.abs = call double @llvm.fabs.f64(double %a) %a.cvt = fptrunc double %a.abs to bfloat diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index 0070c8ab9ee78..789d6f892762b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -15,3 +15,48 @@ v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] v_lshl_add_u64 v[2:3], v[4:5], v7, 12345 // GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, v1, v2 +// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_bf16_f32 v5, v255, v255 +// GFX1250: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6d,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_bf16_f32 v5, s1, s2 +// GFX1250: v_cvt_pk_bf16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, s105, s105 +// GFX1250: v_cvt_pk_bf16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6d,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, vcc_lo, ttmp15 +// GFX1250: v_cvt_pk_bf16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6d,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, vcc_hi, 0xaf123456 +// GFX1250: v_cvt_pk_bf16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_bf16_f32 v5, ttmp15, src_scc +// GFX1250: v_cvt_pk_bf16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6d,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, m0, 0.5 +// GFX1250: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6d,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, exec_lo, -1 +// GFX1250: v_cvt_pk_bf16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6d,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, exec_hi, null +// GFX1250: v_cvt_pk_bf16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6d,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, null, exec_lo +// GFX1250: v_cvt_pk_bf16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6d,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, -1, exec_hi +// GFX1250: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6d,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 +// GFX1250: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6d,0xd7,0xf0,0xfa,0x00,0x08] + +v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4 +// GFX1250: v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6d,0xd7,0xfd,0xd4,0x00,0x10] + +v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 553eacc8e7b61..e1165faf59d9c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -15,3 +15,48 @@ v_lshl_add_u64 v[2:3], s[4:5], 4, v[2:3] v_lshl_add_u64 v[2:3], v[4:5], v7, 12345 // GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, v1, v2 +// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_bf16_f32 v5, v255, v255 +// GFX1250: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6d,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_bf16_f32 v5, s1, s2 +// GFX1250: v_cvt_pk_bf16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x04,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, s105, s105 +// GFX1250: v_cvt_pk_bf16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6d,0xd7,0x69,0xd2,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, vcc_lo, ttmp15 +// GFX1250: v_cvt_pk_bf16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6d,0xd7,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, vcc_hi, 0xaf123456 +// GFX1250: v_cvt_pk_bf16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_bf16_f32 v5, ttmp15, src_scc +// GFX1250: v_cvt_pk_bf16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6d,0xd7,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, m0, 0.5 +// GFX1250: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6d,0xd7,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, exec_lo, -1 +// GFX1250: v_cvt_pk_bf16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6d,0xd7,0x7e,0x82,0x01,0x00] + +v_cvt_pk_bf16_f32 v5, exec_hi, null +// GFX1250: v_cvt_pk_bf16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6d,0xd7,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, null, exec_lo +// GFX1250: v_cvt_pk_bf16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6d,0xd7,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, -1, exec_hi +// GFX1250: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6d,0xd7,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 +// GFX1250: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6d,0xd7,0xf0,0xfa,0x00,0x08] + +v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4 +// GFX1250: v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6d,0xd7,0xfd,0xd4,0x00,0x10] + +v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 +// GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s new file mode 100644 index 0000000000000..bc910b9dd18e9 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s @@ -0,0 +1,59 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s new file mode 100644 index 0000000000000..3bb84e264cf76 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s @@ -0,0 +1,59 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s new file mode 100644 index 0000000000000..f48445f84aa31 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s @@ -0,0 +1,19 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s new file mode 100644 index 0000000000000..d7a95f42aaecc --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s @@ -0,0 +1,19 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding < %s | FileCheck --check-prefix=GFX1250 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index d9d8f60fe3d17..a1a1d0c5d7ed2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -16,6 +16,52 @@ 0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00 # GFX1250: v_lshl_add_u64 v[2:3], v[4:5], v7, 0x3039 ; encoding: [0x02,0x00,0x52,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00] + +0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX1250: v_cvt_pk_bf16_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x6d,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] + +0x05,0x00,0x6d,0xd7,0xc1,0xfe,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x6d,0xd7,0xc1,0xfe,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0xf0,0xfa,0x00,0x08 +# GFX1250: v_cvt_pk_bf16_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x6d,0xd7,0xf0,0xfa,0x00,0x08] + +0x05,0x00,0x6d,0xd7,0x7f,0xf8,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x6d,0xd7,0x7f,0xf8,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0x7e,0x82,0x01,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x6d,0xd7,0x7e,0x82,0x01,0x00] + +0x05,0x00,0x6d,0xd7,0x7d,0xe0,0x01,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x6d,0xd7,0x7d,0xe0,0x01,0x00] + +0x05,0x00,0x6d,0xd7,0x7c,0xfc,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x6d,0xd7,0x7c,0xfc,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0x01,0x04,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x04,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0x69,0xd2,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x6d,0xd7,0x69,0xd2,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0xfd,0xd4,0x00,0x10 +# GFX1250: v_cvt_pk_bf16_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x6d,0xd7,0xfd,0xd4,0x00,0x10] + +0x05,0x00,0x6d,0xd7,0x7b,0xfa,0x01,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x6d,0xd7,0x7b,0xfa,0x01,0x00] + +0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x00,0x6d,0xd7,0xff,0xff,0x03,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x6d,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x00,0x6d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX1250: v_cvt_pk_bf16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x6d,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +0x05,0x00,0x6d,0xd7,0x6a,0xf6,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x6d,0xd7,0x6a,0xf6,0x00,0x00] + ## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: # GFX1250-FAKE16: {{.*}} # GFX1250-REAL16: {{.*}} diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt new file mode 100644 index 0000000000000..dec73b74afc8d --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6d,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt new file mode 100644 index 0000000000000..db211f9061dca --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s + +0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x81,0x6d,0xd7,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd7,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +0x05,0x00,0x6d,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05 +# GFX1250: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05]