diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 5207201e14c09..6baef137df5e1 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -3007,8 +3007,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { switch (I.getOpcode()) { case AMDGPU::V_ADDC_U32_e32: case AMDGPU::V_ADDC_U32_dpp: - case AMDGPU::V_CNDMASK_B16_e32: - case AMDGPU::V_CNDMASK_B16_dpp: + case AMDGPU::V_CNDMASK_B16_fake16_e32: + case AMDGPU::V_CNDMASK_B16_fake16_dpp: case AMDGPU::V_CNDMASK_B32_e32: case AMDGPU::V_CNDMASK_B32_dpp: case AMDGPU::V_DIV_FMAS_F32_e64: @@ -3023,8 +3023,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { HazardReg == AMDGPU::VCC_HI; case AMDGPU::V_ADDC_U32_e64: case AMDGPU::V_ADDC_U32_e64_dpp: - case AMDGPU::V_CNDMASK_B16_e64: - case AMDGPU::V_CNDMASK_B16_e64_dpp: + case AMDGPU::V_CNDMASK_B16_fake16_e64: + case AMDGPU::V_CNDMASK_B16_fake16_e64_dpp: case AMDGPU::V_CNDMASK_B32_e64: case AMDGPU::V_CNDMASK_B32_e64_dpp: case AMDGPU::V_SUBB_U32_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index cdc1132579d8d..1abbf4c217a69 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1245,11 +1245,22 @@ class VOPSelectPat : GCNPat < (vt (select i1:$src0, vt:$src1, vt:$src2)), (V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0) >; +class VOPSelectPat_t16 : GCNPat < + (vt (select i1:$src0, vt:$src1, vt:$src2)), + (V_CNDMASK_B16_t16_e64 0, VSrcT_b16:$src2, 0, VSrcT_b16:$src1, SSrc_i1:$src0) +>; def : VOPSelectModsPat ; def : VOPSelectModsPat ; -def : VOPSelectPat ; -def : VOPSelectPat ; +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { + def : VOPSelectPat ; + def : VOPSelectPat ; +} // End True16Predicate = p +let True16Predicate = UseRealTrue16Insts in { + def : VOPSelectPat_t16 ; + def : VOPSelectPat_t16 ; +} // End True16Predicate = UseRealTrue16Insts let AddedComplexity = 1 in { def : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ca4a0fa706c30..691e1cea917bb 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -714,6 +714,26 @@ class VOP2e_SGPR ArgVT> : VOPProfile { def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; // V_CNDMASK_B16 is VOP3 only +def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { + let IsTrue16 = 1; + let IsRealTrue16 = 1; + let HasOpSel = 1; + let DstRC64 = getVALUDstForVT.ret; + let Src0RC64 = getVOP3SrcForVT.ret; + let Src1RC64 = getVOP3SrcForVT.ret; + let Src2RC64 = getVOP3SrcForVT.ret; + let Src0Mod = getSrc0Mod.ret; + let Src1Mod = getSrcMod.ret; + let HasSrc2Mods = 0; + let InsVOP3OpSel = getInsVOP3Base.ret; + let Src0VOP3DPP = VGPRSrc_16; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src0ModVOP3DPP = getSrc0ModVOP3DPP.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; +} def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { let IsTrue16 = 1; let DstRC64 = getVALUDstForVT.ret; @@ -765,8 +785,10 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { // VOP2 Instructions //===----------------------------------------------------------------------===// -let SubtargetPredicate = isGFX11Plus in -defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; +let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in +defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>; +let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in +defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>; defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; @@ -1830,7 +1852,7 @@ defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037 defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">; // VOP3 only. -defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>; +defm V_CNDMASK_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">; defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>; defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>; defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>; diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index bc359d6ff3aaa..8e3c905b0eae5 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -34508,14 +34508,25 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) { ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_select_bf16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11TRUE16-LABEL: v_select_bf16: +; GFX11TRUE16: ; %bb.0: +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo +; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11FAKE16-LABEL: v_select_bf16: +; GFX11FAKE16: ; %bb.0: +; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31] %op = select i1 %cond, bfloat %a, bfloat %b ret bfloat %op } @@ -34573,11 +34584,14 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) { ; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16: ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v1.l -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 +; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16: @@ -34647,11 +34661,14 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) { ; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16: ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_xor_b16 v2.l, 0x8000, v2.l -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 +; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16: @@ -34749,11 +34766,15 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b) ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v2, v1 :: v_dual_cndmask_b32 v1, v3, v4 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v4.l, v3.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v2.l, v1.l, vcc_lo +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] @@ -34856,14 +34877,19 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo ; GFX11TRUE16-LABEL: v_vselect_v2bf16: ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc_lo +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v5.l, v4.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, v2.l, s0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] @@ -34936,16 +34962,27 @@ define amdgpu_ps i32 @s_select_bf16(bfloat inreg %a, bfloat inreg %b, i32 %c) { ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog ; -; GFX11-LABEL: s_select_bf16: -; GFX11: ; %bb.0: -; GFX11-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo -; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: ; return to shader part epilog +; GFX11TRUE16-LABEL: s_select_bf16: +; GFX11TRUE16: ; %bb.0: +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11TRUE16-NEXT: ; return to shader part epilog +; +; GFX11FAKE16-LABEL: s_select_bf16: +; GFX11FAKE16: ; %bb.0: +; GFX11FAKE16-NEXT: v_mov_b32_e32 v1, s0 +; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo +; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11FAKE16-NEXT: ; return to shader part epilog %cond = icmp eq i32 %c, 0 %op = select i1 %cond, bfloat %a, bfloat %b %cast = bitcast bfloat %op to i16 @@ -35038,17 +35075,21 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg ; ; GFX11TRUE16-LABEL: s_select_v2bf16: ; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_lshr_b32 s2, s1, 16 -; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s2 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s3 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s1 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s0 +; GFX11TRUE16-NEXT: s_lshr_b32 s2, s0, 16 +; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, v3, v4 -; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s2 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.l, v1.h, vcc_lo +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11TRUE16-NEXT: ; return to shader part epilog ; @@ -35156,17 +35197,20 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg ; ; GFX11TRUE16-LABEL: s_vselect_v2bf16: ; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_lshr_b32 s2, s1, 16 -; GFX11TRUE16-NEXT: s_lshr_b32 s3, s0, 16 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s2 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s3 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s1 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s0 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo +; GFX11TRUE16-NEXT: s_lshr_b32 s3, s1, 16 +; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s3 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s4 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s1 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, s2 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.l, v1.h, vcc_lo +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0 @@ -36876,33 +36920,38 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat> ; ; GFX11TRUE16-LABEL: s_vselect_v4bf16: ; GFX11TRUE16: ; %bb.0: -; GFX11TRUE16-NEXT: s_lshr_b32 s4, s3, 16 -; GFX11TRUE16-NEXT: s_lshr_b32 s5, s1, 16 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s4 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s5 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX11TRUE16-NEXT: s_lshr_b32 s4, s0, 16 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, s2 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, s0 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, s4 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc_lo -; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, s3 -; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, s3 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, s1 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3) -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo +; GFX11TRUE16-NEXT: s_lshr_b32 s7, s3, 16 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v7, v8, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v4, v9, vcc_lo -; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1 +; GFX11TRUE16-NEXT: s_lshr_b32 s8, s1, 16 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, s7 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, s3 +; GFX11TRUE16-NEXT: s_lshr_b32 s3, s2, 16 +; GFX11TRUE16-NEXT: s_lshr_b32 s7, s0, 16 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.h, s8 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.h, s3 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s7 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.h, s2 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, s0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.h, s1 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v0.h, s6 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.l, s4 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v2.h, v3.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v1.l, v3.h, s5 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.h +; GFX11TRUE16-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 -; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100 ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11TRUE16-NEXT: ; return to shader part epilog ; ; GFX11FAKE16-LABEL: s_vselect_v4bf16: @@ -37078,29 +37127,33 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo ; GFX11TRUE16-LABEL: v_vselect_v4bf16: ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v7.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v5.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 -; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v8, v9, vcc_lo -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v6 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v4 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v7 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v2 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v8, v9, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 -; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v6 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v5 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v0 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v6.l, v4.l, s0 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v8.l, v3.l, s1 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v7.l, v5.l, s2 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_vselect_v4bf16: @@ -37368,51 +37421,51 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo ; GFX11TRUE16-LABEL: v_vselect_v8bf16: ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v15.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v11.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v15 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11 -; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6 -; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v16, v17, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 ; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v14 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v10 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v14, v10, vcc_lo -; GFX11TRUE16-NEXT: v_mov_b16_e32 v10.l, v13.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v9.l -; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v13 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v9 -; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v16, v17, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v10, v14, vcc_lo -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v12 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v8 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v10, v14, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 -; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo -; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v7 +; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v6 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3 +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v5 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v15 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v0 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v1 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v11 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v2 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v3 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v8 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v5.l, v1.l, s2 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v12 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v9 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v13 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v10 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v14 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v15.l, v11.l, s3 +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v14.l, v10.l, s4 +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v3.l, v2.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v12.l, v8.l, s0 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v5.l, v4.l, s1 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v13.l, v9.l, s5 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v7.l, v6.l, s6 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v3.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v1.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v0.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v0.l +; GFX11TRUE16-NEXT: v_perm_b32 v0, v4, v5, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v1, v2, v6, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v2, v3, v7, 0x5040100 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11TRUE16-NEXT: v_perm_b32 v3, v8, v9, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_vselect_v8bf16: @@ -38024,101 +38077,96 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX11TRUE16: ; %bb.0: ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11TRUE16-NEXT: scratch_load_b32 v31, off, s32 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v49.l, v26.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v50.l, v18.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v26 -; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v33.l, v30.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v34.l, v22.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v53.l, v24.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v54.l, v16.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v16 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v35.l, v29.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v36.l, v21.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v51.l, v25.l -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v12, v33, v34, vcc_lo -; GFX11TRUE16-NEXT: v_mov_b16_e32 v52.l, v17.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v17 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v25 +; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9 ; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10 -; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v37.l, v28.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v38.l, v20.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v20 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v10, v35, v36, vcc_lo -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v28 +; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7 ; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v20 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v28 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s7, 1, v9 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s8, 1, v8 +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v39.l, v27.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v48.l, v19.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v19 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v8, v37, v38, vcc_lo -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v27 ; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v21 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v29 -; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v39, v48, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 -; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v22 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v30 -; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v49, v50, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 ; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v32.l, v23.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v23 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v51, v52, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10 ; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v53, v54, vcc_lo -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v13, v30, v22, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v11, v29, v21, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v9, v28, v20, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v27, v19, vcc_lo +; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12 +; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15 +; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v19 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v27 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v16 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v24 ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v24, v16, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v25, v17, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 -; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v26, v18, vcc_lo -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v2 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v7 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v6 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v28.l, v20.l, s8 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v38.l, v37.l, s7 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v23 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v22 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v30 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v21 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v29 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v18 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v26 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v17 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v25 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v5 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s9, 1, v11 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s10, 1, v12 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s11, 1, v13 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s12, 1, v10 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v15 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s14, 1, v14 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v27.l, v19.l, s6 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v48.l, v39.l, s5 +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v54.l, v53.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v24.l, v16.l, s0 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v12.l, v2.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v13.l, v2.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v30.l, v22.l, s10 +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v34.l, v33.l, s11 +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v29.l, v21.l, s12 +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v36.l, v35.l, s9 +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v52.l, v51.l, s1 +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v25.l, v17.l, s2 +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v50.l, v49.l, s3 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v5.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v10.l, v3.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v11.l, v3.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v26.l, v18.l, s4 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v5.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v9.l, v6.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v6.l, v6.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v1.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v15.l, v1.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v0.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l +; GFX11TRUE16-NEXT: v_perm_b32 v0, v7, v8, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v1, v5, v9, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v5, v14, v15, 0x5040100 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v31 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v31.l -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v16.l -; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11TRUE16-NEXT: v_dual_cndmask_b32 v14, v17, v32 :: v_dual_and_b32 v15, 1, v15 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v15, v3, v23, vcc_lo -; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11TRUE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v31 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v31.l, v23.l, s14 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v2.l, v32.l, s13 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l +; GFX11TRUE16-NEXT: v_perm_b32 v2, v6, v4, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v4, v12, v13, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v6, v16, v17, 0x5040100 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v8.l, v3.h +; GFX11TRUE16-NEXT: v_perm_b32 v3, v10, v11, 0x5040100 +; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11TRUE16-NEXT: v_perm_b32 v7, v8, v7, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_vselect_v16bf16: @@ -39660,217 +39708,197 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:8 ; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:68 ; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:4 +; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8 +; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22 +; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24 +; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26 +; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28 +; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30 +; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5 +; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7 +; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9 +; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11 +; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10 +; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13 +; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12 +; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15 +; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14 +; GFX11TRUE16-NEXT: v_and_b32_e32 v17, 1, v17 +; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16 +; GFX11TRUE16-NEXT: v_and_b32_e32 v19, 1, v19 +; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18 +; GFX11TRUE16-NEXT: v_and_b32_e32 v21, 1, v21 +; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20 +; GFX11TRUE16-NEXT: v_and_b32_e32 v23, 1, v23 +; GFX11TRUE16-NEXT: v_and_b32_e32 v25, 1, v25 +; GFX11TRUE16-NEXT: v_and_b32_e32 v27, 1, v27 +; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s8, 1, v8 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s22, 1, v22 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s24, 1, v24 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v30 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s27, 1, v26 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v28 +; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v3 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 1, v2 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s3, 1, v5 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 1, v4 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 1, v7 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s7, 1, v9 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s9, 1, v11 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s10, 1, v10 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s11, 1, v13 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s12, 1, v12 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v15 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s14, 1, v14 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v17 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s16, 1, v16 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v19 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s18, 1, v18 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v21 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s20, 1, v20 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v23 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s23, 1, v25 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s25, 1, v27 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s28, 1, v29 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 1, v6 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32) +; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v31 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v96.l, v32.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v32 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v97.l, v33.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v98.l, v34.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.l, v32.l, v33.l, s26 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v16, 16, v33 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v99.l, v35.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(27) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v100.l, v36.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v0.h, v34.l, v35.l, s29 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v9, 16, v35 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v10, 16, v34 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(26) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v101.l, v37.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(25) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v102.l, v38.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.l, v36.l, v37.l, s27 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v11, 16, v37 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v12, 16, v36 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(24) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v103.l, v39.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(23) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v112.l, v48.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v1.h, v38.l, v39.l, s24 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v13, 16, v39 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v14, 16, v38 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(22) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v113.l, v49.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(21) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v114.l, v50.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(20) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v115.l, v51.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(19) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v116.l, v52.l +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.l, v48.l, v49.l, s22 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v15, 16, v49 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v48 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(18) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v117.l, v53.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(17) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v118.l, v54.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(16) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v119.l, v55.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(15) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v128.l, v64.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v21, 16, v53 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v22, 16, v52 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(14) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v129.l, v65.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(13) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v130.l, v66.l -; GFX11TRUE16-NEXT: s_waitcnt vmcnt(12) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v131.l, v67.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v25, 16, v65 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v26, 16, v64 +; GFX11TRUE16-NEXT: v_cndmask_b16 v2.h, v50.l, v51.l, s20 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(11) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v132.l, v68.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v30, 16, v68 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(10) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v133.l, v69.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v29, 16, v69 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(9) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v134.l, v70.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v70 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(8) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v135.l, v71.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v71, 16, v71 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v70, 16, v70 -; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v31, 16, v71 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(7) +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v80 +; GFX11TRUE16-NEXT: s_waitcnt vmcnt(6) +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v81 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(5) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v146.l, v82.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v82 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(4) -; GFX11TRUE16-NEXT: v_mov_b16_e64 v147.l, v83.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v83, 16, v83 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v82, 16, v82 -; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v30 -; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v83 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(3) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v84.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v84, 16, v84 -; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v96, v96, v97, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28 -; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v84 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v97.l, v85.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v85, 16, v85 -; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v98, v98, v99, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26 -; GFX11TRUE16-NEXT: v_and_b32_e32 v7, 1, v7 -; GFX11TRUE16-NEXT: v_mov_b16_e64 v144.l, v80.l -; GFX11TRUE16-NEXT: v_mov_b16_e64 v145.l, v81.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v81, 16, v81 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v26, v100, v101, vcc_lo -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v80, 16, v80 -; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22 -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24 -; GFX11TRUE16-NEXT: v_and_b32_e32 v5, 1, v5 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v69, 16, v69 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v68, 16, v68 -; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v24, v102, v103, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22 -; GFX11TRUE16-NEXT: v_and_b32_e32 v11, 1, v11 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v67, 16, v67 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v66, 16, v66 -; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v22, v112, v113, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20 -; GFX11TRUE16-NEXT: v_and_b32_e32 v9, 1, v9 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v65, 16, v65 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v64, 16, v64 -; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v20, v114, v115, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18 -; GFX11TRUE16-NEXT: v_and_b32_e32 v15, 1, v15 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v55, 16, v55 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v54, 16, v54 -; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v18, v116, v117, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16 -; GFX11TRUE16-NEXT: v_and_b32_e32 v13, 1, v13 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v53, 16, v53 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v52, 16, v52 -; GFX11TRUE16-NEXT: v_and_b32_e32 v12, 1, v12 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v16, v118, v119, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14 -; GFX11TRUE16-NEXT: v_and_b32_e32 v19, 1, v19 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v51, 16, v51 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v50, 16, v50 -; GFX11TRUE16-NEXT: v_and_b32_e32 v10, 1, v10 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v14, v128, v129, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 -; GFX11TRUE16-NEXT: v_and_b32_e32 v17, 1, v17 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v49, 16, v49 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v48 -; GFX11TRUE16-NEXT: v_and_b32_e32 v8, 1, v8 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v12, v130, v131, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10 -; GFX11TRUE16-NEXT: v_and_b32_e32 v23, 1, v23 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v39 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v38, 16, v38 -; GFX11TRUE16-NEXT: v_and_b32_e32 v6, 1, v6 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v10, v132, v133, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8 -; GFX11TRUE16-NEXT: v_and_b32_e32 v21, 1, v21 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v37 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v36, 16, v36 -; GFX11TRUE16-NEXT: v_and_b32_e32 v4, 1, v4 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v8, v134, v135, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6 -; GFX11TRUE16-NEXT: v_and_b32_e32 v27, 1, v27 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v35, 16, v35 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v34, 16, v34 -; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v6, v144, v145, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4 -; GFX11TRUE16-NEXT: v_and_b32_e32 v25, 1, v25 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v33, 16, v33 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v32, 16, v32 -; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v4, v146, v147, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 -; GFX11TRUE16-NEXT: v_and_b32_e32 v31, 1, v31 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v37, 16, v85 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v86.l +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v48, 16, v86 ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11TRUE16-NEXT: v_mov_b16_e32 v99.l, v87.l -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v87, 16, v87 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v2, v30, v97, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29 -; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v86, 16, v86 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v84.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v84.l, v85.l -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v28, v99, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v31 -; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v86.l -; GFX11TRUE16-NEXT: v_mov_b16_e32 v85.l, v87.l -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v31, v32, v33, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v29 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v29, v34, v35, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v27, v36, v37, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v23, v48, v49, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v15, v64, v65, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v13, v66, v67, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v11, v68, v69, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v7, v80, v81, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v3, v30, v84, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v1, v28, v85, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5 -; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11TRUE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v5, v82, v83, vcc_lo -; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9 -; GFX11TRUE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100 -; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v9, v70, v71, vcc_lo -; GFX11TRUE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v10, v21, v20, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v11, v23, v22, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v8, v17, v16, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v9, v19, v18, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v12, v25, v24, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v13, v27, v26, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v14, v29, v98, 0x5040100 -; GFX11TRUE16-NEXT: v_perm_b32 v15, v31, v96, 0x5040100 +; GFX11TRUE16-NEXT: v_cndmask_b16 v7.h, v86.l, v87.l, s0 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v39, 16, v87 +; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v8 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v51 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v50 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.l, v52.l, v53.l, s18 +; GFX11TRUE16-NEXT: v_cndmask_b16 v3.h, v54.l, v55.l, s16 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v23, 16, v55 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v24, 16, v54 +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.l, v64.l, v65.l, s14 +; GFX11TRUE16-NEXT: v_cndmask_b16 v4.h, v66.l, v67.l, s12 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v27, 16, v67 +; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v28, 16, v66 +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.h, v70.l, v71.l, s8 +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.h, v82.l, v83.l, s4 +; GFX11TRUE16-NEXT: v_cndmask_b16 v8.l, v10.l, v9.l, s28 +; GFX11TRUE16-NEXT: v_cndmask_b16 v8.h, v12.l, v11.l, s25 +; GFX11TRUE16-NEXT: v_cndmask_b16 v9.l, v14.l, v13.l, s23 +; GFX11TRUE16-NEXT: v_cndmask_b16 v9.h, v18.l, v15.l, s21 +; GFX11TRUE16-NEXT: v_cndmask_b16 v10.h, v22.l, v21.l, s17 +; GFX11TRUE16-NEXT: v_cndmask_b16 v11.h, v26.l, v25.l, s13 +; GFX11TRUE16-NEXT: v_cndmask_b16 v12.h, v30.l, v29.l, s9 +; GFX11TRUE16-NEXT: v_cndmask_b16 v13.l, v32.l, v31.l, s7 +; GFX11TRUE16-NEXT: v_cndmask_b16 v13.h, v34.l, v33.l, s5 +; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v36.l, v35.l, s3 +; GFX11TRUE16-NEXT: v_cndmask_b16 v14.h, v38.l, v37.l, s1 +; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v48.l, v39.l, vcc_lo +; GFX11TRUE16-NEXT: v_cndmask_b16 v15.h, v17.l, v16.l, s0 +; GFX11TRUE16-NEXT: v_cndmask_b16 v5.l, v68.l, v69.l, s10 +; GFX11TRUE16-NEXT: v_cndmask_b16 v6.l, v80.l, v81.l, s6 +; GFX11TRUE16-NEXT: v_cndmask_b16 v7.l, v84.l, v85.l, s2 +; GFX11TRUE16-NEXT: v_cndmask_b16 v10.l, v20.l, v19.l, s19 +; GFX11TRUE16-NEXT: v_cndmask_b16 v11.l, v24.l, v23.l, s15 +; GFX11TRUE16-NEXT: v_cndmask_b16 v12.l, v28.l, v27.l, s11 +; GFX11TRUE16-NEXT: v_mov_b16_e32 v18.l, v7.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v19.l, v6.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v20.l, v5.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v21.l, v4.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v22.l, v4.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v23.l, v3.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v24.l, v3.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v25.l, v2.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v26.l, v2.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v27.l, v1.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v28.l, v1.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v29.l, v0.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v30.l, v0.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v0.l, v15.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v14.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v3.l, v13.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v13.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v13.l, v12.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v14.l, v11.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v10.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v17.l, v9.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v31.l, v9.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v32.l, v8.h +; GFX11TRUE16-NEXT: v_mov_b16_e32 v33.l, v8.l +; GFX11TRUE16-NEXT: v_mov_b16_e32 v15.l, v15.h +; GFX11TRUE16-NEXT: v_perm_b32 v0, v0, v18, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v1, v1, v7, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v2, v2, v19, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v3, v3, v6, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v4, v4, v20, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v5, v13, v5, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v6, v12, v21, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v7, v14, v22, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v8, v11, v23, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v9, v16, v24, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v10, v10, v25, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v11, v17, v26, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v12, v31, v27, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v13, v32, v28, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v14, v33, v29, 0x5040100 +; GFX11TRUE16-NEXT: v_perm_b32 v15, v15, v30, 0x5040100 ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11FAKE16-LABEL: v_vselect_v32bf16: diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll index f20c1ccb2d63e..c6cc479b5deb1 100644 --- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll @@ -3,6 +3,7 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare half @llvm.fabs.f16(half) @@ -90,6 +91,24 @@ define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr a ; GFX11-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: v_cnd_nan_nosgpr: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v0, v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx %f = load float, ptr addrspace(1) %f.gep @@ -155,6 +174,18 @@ define amdgpu_kernel void @v_cnd_nan(ptr addrspace(1) %out, i32 %c, float %f) #0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, -1, s3, s[4:5] ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: v_cnd_nan: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: v_mov_b32_e32 v0, 0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_eq_u32 s2, 0 +; GFX12-NEXT: s_cselect_b32 s2, s3, -1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %setcc = icmp ne i32 %c, 0 %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f store float %select, ptr addrspace(1) %out @@ -220,6 +251,21 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s1, s[4:5] ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k1_sgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c +; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s0, 0 +; GFX12-NEXT: s_cselect_b32 s0, s1, 1.0 +; GFX12-NEXT: v_mov_b32_e32 v1, s0 +; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext @@ -285,6 +331,19 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s6, s[2:3] ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k1_sgprX_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s2, 0 +; GFX12-NEXT: s_cselect_b32 s2, s2, 1.0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext @@ -350,6 +409,21 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, s1, s[4:5] ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k0_sgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x4c +; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s0, 0 +; GFX12-NEXT: s_cselect_b32 s0, s1, 0 +; GFX12-NEXT: v_mov_b32_e32 v1, s0 +; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext @@ -415,6 +489,19 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, s6, s[2:3] ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k0_sgprX_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s2, 0 +; GFX12-NEXT: s_cselect_b32 s2, s2, 0 +; GFX12-NEXT: v_mov_b32_e32 v1, s2 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext @@ -498,6 +585,23 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k0_vgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s2, 0 +; GFX12-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext @@ -583,6 +687,23 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_sgprX_k0_select_k1_vgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_nlg_f32 s2, 0 +; GFX12-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %z.gep = getelementptr inbounds float, ptr addrspace(1) %z.ptr, i64 %tid.ext @@ -661,6 +782,21 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, s4, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_select_k1_sgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_load_b32 s4, s[4:5], 0x34 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1 +; GFX12-NEXT: v_cndmask_b32_e64 v1, 1.0, s4, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -751,6 +887,24 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -843,6 +997,24 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e32 v1, 2, v2, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_lt_i32_e32 vcc, -1, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 2, v2, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -939,6 +1111,25 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(ptr addrspace(1) %o ; GFX11-NEXT: v_cndmask_b32_e32 v0, 2, v2, vcc ; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v4, 3, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b64 v[0:1], v4, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b64 v[2:3], v4, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[0:1] +; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 2, v2, vcc +; GFX12-NEXT: global_store_b64 v4, v[0:1], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i64, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1048,6 +1239,28 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_select_vgprZ_k1_v4f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_nge_f32_e32 vcc, 4.0, v5 +; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc +; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1157,6 +1370,28 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_select_k1_vgprZ_v4f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_ge_f32_e32 vcc, 4.0, v5 +; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc +; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1268,6 +1503,28 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_v4f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v5, v1, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b128 v[0:3], v4, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 4.0, v5 +; GFX12-NEXT: v_cndmask_b32_e32 v3, 4.0, v3, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v2, -0.5, v2, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 1.0, v0, vcc +; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1375,6 +1632,29 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(ptr addrspace(1) %ou ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX11-NEXT: global_store_b8 v0, v1, s[8:9] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: icmp_vgprX_k0_select_k1_vgprZ_i1: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v1, s[10:11] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_u8 v2, v0, s[0:1] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 +; GFX12-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v2 +; GFX12-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; GFX12-NEXT: global_store_b8 v0, v1, s[8:9] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1479,6 +1759,26 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_selectf64_k1_vgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v3, v1, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b64 v[0:1], v2, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_le_f32_e32 vcc, 0, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 0x3ff00000, v1, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1581,6 +1881,26 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, 3, v0, vcc ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_vgprX_k0_selecti64_k1_vgprZ_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v3, v1, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b64 v[0:1], v2, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_nlg_f32_e32 vcc, 0, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX12-NEXT: v_cndmask_b32_e32 v0, 3, v0, vcc +; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1674,6 +1994,24 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(ptr addrspace(1) ; GFX11-NEXT: v_cndmask_b32_e32 v1, 4.0, v2, vcc ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: icmp_vgprX_k0_selectf32_k1_vgprZ_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_gt_u32_e32 vcc, 2, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v1, 4.0, v2, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds i32, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1783,6 +2121,28 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2(ptr add ; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_load_b32 v2, v0, s[4:5] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cmp_nle_f32_e32 vcc, 4.0, v1 +; GFX12-NEXT: v_cndmask_b32_e64 v1, v2, -1.0, vcc +; GFX12-NEXT: v_cndmask_b32_e64 v2, v2, -2.0, vcc +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: global_store_b32 v0, v2, s[0:1] scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 %x.gep = getelementptr inbounds float, ptr addrspace(1) %x.ptr, i64 %tid.ext @@ -1890,6 +2250,27 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f16(ptr addrspace(1) %out, i32 %c, ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GFX11-NEXT: global_store_b16 v2, v0, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: v_cndmask_abs_neg_f16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: v_mov_b32_e32 v2, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_u16 v0, v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_lg_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v1, 0x7fff, v0 +; GFX12-NEXT: v_xor_b32_e32 v0, 0x8000, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX12-NEXT: global_store_b16 v2, v0, s[0:1] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr half, ptr addrspace(1) %fptr, i32 %idx %f = load half, ptr addrspace(1) %f.gep @@ -1981,6 +2362,24 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f32(ptr addrspace(1) %out, i32 %c, ; GFX11-NEXT: v_cndmask_b32_e64 v0, -v0, |v0|, s[2:3] ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: v_cndmask_abs_neg_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b32 v0, v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_lg_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 s[2:3], -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_cndmask_b32_e64 v0, -v0, |v0|, s[2:3] +; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr float, ptr addrspace(1) %fptr, i32 %idx %f = load float, ptr addrspace(1) %f.gep @@ -2086,6 +2485,28 @@ define amdgpu_kernel void @v_cndmask_abs_neg_f64(ptr addrspace(1) %out, i32 %c, ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX11-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: v_cndmask_abs_neg_f64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX12-NEXT: v_mov_b32_e32 v3, 0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: global_load_b64 v[0:1], v0, s[0:1] +; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_lg_u32 s2, 0 +; GFX12-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: v_and_b32_e32 v2, 0x7fffffff, v1 +; GFX12-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v0, vcc +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX12-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr double, ptr addrspace(1) %fptr, i32 %idx %f = load double, ptr addrspace(1) %f.gep diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir index c936c13ac6c66..d91ee54215924 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir @@ -159,16 +159,16 @@ name: mask_hazard_cndmask_dpp3 body: | bb.0: ; GFX11-LABEL: name: mask_hazard_cndmask_dpp3 - ; GFX11: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec + ; GFX11: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_cndmask_dpp3 - ; GFX12: $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec + ; GFX12: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc ; GFX12-NEXT: S_ENDPGM 0 - $vgpr0 = V_CNDMASK_B16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec + $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc S_ENDPGM 0 ... diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 6bc92bc29ea8a..40e3fbda47787 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -899,104 +899,131 @@ v_bfm_b32 v5, src_scc, vcc_lo v_bfm_b32 v255, 0xaf123456, vcc_hi // GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cndmask_b16 v5, v1, src_scc, s3 -// W32: v_cndmask_b16 v5, v1, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction +v_cndmask_b16 v5.l, v1.l, src_scc, s3 +// W32: v_cndmask_b16 v5.l, v1.l, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction -v_cndmask_b16 v5, v255, 0.5, s3 -// W32: v_cndmask_b16 v5, v255, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.l, 0.5, s3 +// W32: v_cndmask_b16 v5.l, v255.l, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction -v_cndmask_b16 v5, s105, s105, s3 -// W32: v_cndmask_b16 v5, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction - -v_cndmask_b16 v5, vcc_hi, v2, s3 -// W32: v_cndmask_b16 v5, vcc_hi, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, s105, s105, s3 +// W32: v_cndmask_b16 v5.l, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, ttmp15, ttmp15, s3 -// W32: v_cndmask_b16 v5, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 +// W32: v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, m0, v255, s3 -// W32: v_cndmask_b16 v5, m0, v255, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 +// W32: v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_lo, exec_lo, s3 -// W32: v_cndmask_b16 v5, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, m0, v255.l, s3 +// W32: v_cndmask_b16 v5.l, m0, v255.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_hi, exec_hi, s3 -// W32: v_cndmask_b16 v5, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 +// W32: v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, null, m0, s105 -// W32: v_cndmask_b16 v5, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 +// W32: v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo -// W32: v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, null, m0, s105 +// W32: v_cndmask_b16 v5.l, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, 0.5, -1, vcc_hi -// W32: v_cndmask_b16 v5, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo +// W32: v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -|src_scc|, null, ttmp15 -// W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +v_cndmask_b16 v5.l, 0.5, -1, vcc_hi +// W32: v_cndmask_b16 v5.l, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, v1, src_scc, s[6:7] -// W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction +v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 +// W32: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction -v_cndmask_b16 v5, v255, 0.5, s[6:7] -// W64: v_cndmask_b16 v5, v255, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] +// W64: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction -v_cndmask_b16 v5, s105, s105, s[6:7] -// W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] +// W64: v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction -v_cndmask_b16 v5, vcc_hi, v2, s[6:7] -// W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, s105, s105, s[6:7] +// W64: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] -// W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] +// W64: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, m0, v255, s[6:7] -// W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] +// W64: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] -// W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, m0, v255.l, s[6:7] +// W64: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] -// W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] +// W64: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, null, m0, s[6:7] -// W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] +// W64: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] -// W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, null, m0, s[6:7] +// W64: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] +// W64: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, 0.5, -1, vcc +// W64: v_cndmask_b16 v5.l, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] +// W64: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null +// GFX11: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo +// W32: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, 0x3800, -1, vcc +// W64: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, v255.h, 0.5, s3 +// W32: v_cndmask_b16 v5.l, v255.h, 0.5, s3 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction + +v_cndmask_b16 v5.l, m0, v255.h, s3 +// W32: v_cndmask_b16 v5.l, m0, v255.h, s3 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, 0.5, -1, vcc -// W64: v_cndmask_b16 v5, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] +// W64: v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction -v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] -// W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +v_cndmask_b16 v5.l, m0, v255.h, s[6:7] +// W64: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null -// GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX11: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] v_cubeid_f32 v5, v1, v2, s3 // GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 5fa1334aa6e95..2bff644605ff6 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -765,112 +765,139 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] +v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 2fc02061c59de..2f9b5efca9e17 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -424,44 +424,71 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 3e7b7d28c2e97..cd4ed2b9458e6 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -857,104 +857,131 @@ v_bfm_b32 v5, src_scc, vcc_lo v_bfm_b32 v255, 0xaf123456, vcc_hi // GFX12: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cndmask_b16 v5, v1, src_scc, s3 -// W32: v_cndmask_b16 v5, v1, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction +v_cndmask_b16 v5.l, v1.l, src_scc, s3 +// W32: v_cndmask_b16 v5.l, v1.l, src_scc, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction -v_cndmask_b16 v5, v255, 0.5, s3 -// W32: v_cndmask_b16 v5, v255, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.l, 0.5, s3 +// W32: v_cndmask_b16 v5.l, v255.l, 0.5, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction -v_cndmask_b16 v5, s105, s105, s3 -// W32: v_cndmask_b16 v5, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction - -v_cndmask_b16 v5, vcc_hi, v2, s3 -// W32: v_cndmask_b16 v5, vcc_hi, v2, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, s105, s105, s3 +// W32: v_cndmask_b16 v5.l, s105, s105, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, ttmp15, ttmp15, s3 -// W32: v_cndmask_b16 v5, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 +// W32: v_cndmask_b16 v5.l, vcc_hi, v2.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, m0, v255, s3 -// W32: v_cndmask_b16 v5, m0, v255, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 +// W32: v_cndmask_b16 v5.l, ttmp15, ttmp15, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_lo, exec_lo, s3 -// W32: v_cndmask_b16 v5, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, m0, v255.l, s3 +// W32: v_cndmask_b16 v5.l, m0, v255.l, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_hi, exec_hi, s3 -// W32: v_cndmask_b16 v5, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 +// W32: v_cndmask_b16 v5.l, exec_lo, exec_lo, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, null, m0, s105 -// W32: v_cndmask_b16 v5, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 +// W32: v_cndmask_b16 v5.l, exec_hi, exec_hi, s3 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo -// W32: v_cndmask_b16 v5, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, null, m0, s105 +// W32: v_cndmask_b16 v5.l, null, m0, s105 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, 0.5, -1, vcc_hi -// W32: v_cndmask_b16 v5, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] -// W64-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo +// W32: v_cndmask_b16 v5.l, -1, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa8,0x41] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -|src_scc|, null, ttmp15 -// W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +v_cndmask_b16 v5.l, 0.5, -1, vcc_hi +// W32: v_cndmask_b16 v5.l, 0.5, -1, vcc_hi ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xad,0x01] // W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, v1, src_scc, s[6:7] -// W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:32: error: invalid operand for instruction +v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 +// W32: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp15 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction -v_cndmask_b16 v5, v255, 0.5, s[6:7] -// W64: v_cndmask_b16 v5, v255, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction +v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] +// W64: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:36: error: invalid operand for instruction -v_cndmask_b16 v5, s105, s105, s[6:7] -// W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] +// W64: v_cndmask_b16 v5.l, v255.l, 0.5, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction -v_cndmask_b16 v5, vcc_hi, v2, s[6:7] -// W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, s105, s105, s[6:7] +// W64: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] -// W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] +// W64: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, m0, v255, s[6:7] -// W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] +// W64: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] -// W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, m0, v255.l, s[6:7] +// W64: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] -// W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] +// W64: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, null, m0, s[6:7] -// W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] +// W64: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] -// W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, null, m0, s[6:7] +// W64: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] +// W64: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, 0.5, -1, vcc +// W64: v_cndmask_b16 v5.l, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] +// W64: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction + +v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null +// GFX12: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cndmask_b16 v5, 0.5, -1, vcc -// W64: v_cndmask_b16 v5, 0.5, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01] -// W32-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction +v_cndmask_b16 v5.l, v255.h, 0.5, s3 +// W32: v_cndmask_b16 v5.l, v255.h, 0.5, s3 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction + +v_cndmask_b16 v5.l, m0, v255.h, s3 +// W32: v_cndmask_b16 v5.l, m0, v255.h, s3 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] +// W64: v_cndmask_b16 v5.l, v255.h, 0.5, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:34: error: invalid operand for instruction + +v_cndmask_b16 v5.l, m0, v255.h, s[6:7] +// W64: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction + +v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo +// W32: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] -// W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +v_cndmask_b16 v5.l, 0x3800, -1, vcc +// W64: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] // W32-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction -v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null -// GFX12: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null +// GFX12: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] v_cubeid_f32 v5, v1, v2, s3 // GFX12: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index aa804cc302bf0..78ce7451c1ba7 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -869,128 +869,147 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror -// W64: v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] +v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index e93a65ec92e73..b41f92b889368 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -516,56 +516,75 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5.l, -v1.l, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction -v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xae,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x43,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index adcca58776100..05174e3128919 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1054,55 +1054,100 @@ # GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00 -# W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] -# W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] 0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00 -# W32: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] -# W64: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00 -# W32: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] -# W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00 -# W32: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] -# W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W32-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W64-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] 0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00 -# W32: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] -# W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00 -# W32: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] -# W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] 0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00 -# W32: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] -# W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00 -# W32: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] -# W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00 -# W32: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] -# W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] 0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41 -# W32: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] -# W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W32-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W32-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W64-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W64-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] 0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01 -# W32: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] -# W64: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] 0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21 -# W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] -# W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W32-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W32-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W64-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W64-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] 0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00 +# W32-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s6 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] + +0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00 +# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s6 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] + +0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 2964360a77fd2..c9ef3c714213d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -738,65 +738,118 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] 0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30 -# GFX11: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x09,0x30] 0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30 -# GFX11: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x09,0x30] 0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] + +0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] + +0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 7a81ba23afa35..1e74b5aec0cf3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -396,29 +396,64 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] 0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00 -# GFX11: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] 0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00 -# GFX11: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] 0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 -# GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] + +0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] + +0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index 633d3a48634fa..4108fd9c8be62 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1018,55 +1018,100 @@ # GFX12: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00 -# W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] -# W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v1.l, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] 0x05,0x00,0x5d,0xd6,0xff,0xe1,0x19,0x00 -# W32: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] -# W64: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v255.l, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00 -# W32: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] -# W64: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x69,0xd2,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00 -# W32: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] -# W64: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W32-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, vcc_hi, v2.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] +# W64-FAKE16: v_cndmask_b16 v5, vcc_hi, v2, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x6b,0x04,0x1a,0x00] 0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00 -# W32: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] -# W64: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7b,0xf6,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00 -# W32: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] -# W64: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.l, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] 0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00 -# W32: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] -# W64: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7e,0xfc,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00 -# W32: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] -# W64: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, exec_hi, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7f,0xfe,0x18,0x00] 0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00 -# W32: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] -# W64: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W32-FAKE16: v_cndmask_b16 v5, null, m0, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] +# W64-FAKE16: v_cndmask_b16 v5, null, m0, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7c,0xfa,0x18,0x00] 0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41 -# W32: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] -# W64: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W32-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W32-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s104 ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W64-REAL16: v_cndmask_b16 v5.l, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] +# W64-FAKE16: v_cndmask_b16 v5, -1, -|vcc_lo|, s[104:105] ; encoding: [0x05,0x02,0x5d,0xd6,0xc1,0xd4,0xa0,0x41] 0x05,0x00,0x5d,0xd6,0xf0,0x82,0xa9,0x01 -# W32: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] -# W64: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc_lo ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, 0x3800, -1, vcc ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0x82,0xa9,0x01,0x00,0x38,0x00,0x00] 0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21 -# W32: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] -# W64: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W32-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W32-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W64-REAL16: v_cndmask_b16 v5.l, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] +# W64-FAKE16: v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x5d,0xd6,0xfd,0xf8,0xe8,0x21] 0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v255.l, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x5d,0xd6,0xff,0xe1,0x19,0x00 +# W32-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s6 ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, v255.h, 0x3800, s[6:7] ; encoding: [0x05,0x08,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v5, v255, 0x3800, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0xff,0xff,0x19,0x00,0x00,0x38,0x00,0x00] + +0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00 +# W32-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s6 ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W32-FAKE16: v_cndmask_b16 v5, m0, v255, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-REAL16: v_cndmask_b16 v5.l, m0, v255.h, s[6:7] ; encoding: [0x05,0x10,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] +# W64-FAKE16: v_cndmask_b16 v5, m0, v255, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x7d,0xfe,0x1b,0x00] + +0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_cndmask_b16 v255.h, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x43,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 7e30a4a2096b1..0be540da8287b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -789,59 +789,106 @@ # GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] 0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# GFX12: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] + +0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] + +0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 2aaba2a17fae6..343a71abb27d0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -447,23 +447,52 @@ # GFX12: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 -# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] 0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 -# GFX12: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x5d,0xd6,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] + +0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x12,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x5d,0xd6,0xe9,0x04,0xea,0x21,0x01,0x77,0x39,0x05] + +0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xea,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]