From ac02872d091cd1d7d5318479e5de31e699898376 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Tue, 11 Mar 2025 15:43:05 +0000 Subject: [PATCH 1/3] [PowerPC] Add Dense Math binary integer outer-Product accumulate to DMR Instructions This commit adds the following Dense Math Facility integer calculation instructions: dmxvi8gerx4, dmxvi8gerx4pp, dmxvi8gerx4spp, pmdmxvi8gerx4, pmdmxvi8gerx4pp, and pmdmxvi8gerx4spp, along with their corresponding intrinsics and tests. --- llvm/include/llvm/IR/IntrinsicsPowerPC.td | 21 ++ llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 149 +++++++++ llvm/lib/Target/PowerPC/PPCInstrMMA.td | 8 - .../test/CodeGen/PowerPC/dmf-outer-product.ll | 287 ++++++++++++++++++ .../PowerPC/ppc-encoding-ISAFuture.txt | 18 ++ .../PowerPC/ppc64le-encoding-ISAFuture.txt | 18 ++ llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s | 30 ++ 7 files changed, 523 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/dmf-outer-product.ll diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index af66b8206182e..e4d39134a4a25 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -280,6 +280,13 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic args> { [IntrNoMem]>; } +multiclass PowerPC_MMA_DMR_PP_Intrinsic args> { + def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>; + def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty], + !listconcat([llvm_v1024i1_ty], args), + [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // PowerPC Altivec Intrinsic Class Definitions. // @@ -1711,6 +1718,20 @@ let TargetPrefix = "ppc" in { [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + defm int_ppc_mma_dmxvi8gerx4 : + PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmdmxvi8gerx4 : + PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + def int_ppc_mma_dmxvi8gerx4spp : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], + [llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty], + [IntrNoMem]>; + def int_ppc_mma_pmdmxvi8gerx4spp : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], + [llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; } // XL Compat intrinsics. diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index b7100462cb967..17955a3fb1fd8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -11,6 +11,13 @@ // //===----------------------------------------------------------------------===// +// Mask immediates for MMA instructions (2, 4 and 8 bits). +def Msk2Imm : ImmLeaf(Imm); }]>; +def Msk4Imm : ImmLeaf(Imm); }]>; +def Msk8Imm : ImmLeaf(Imm); }]>; + +def MMA : Predicate<"Subtarget->hasMMA()">; + class XX3Form_AT3_XABp5_P1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, list pattern> : I { @@ -69,6 +76,96 @@ class XForm_ATB3 opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, let Inst{31} = 0; } +class XX3Form_AT3_XAp5B6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<3> AT; + bits<5> XAp; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-8} = AT; + let Inst{9-10} = 0; + let Inst{11-14} = XAp{3-0}; + let Inst{15} = 0; + let Inst{16-20} = XB{4-0}; + let Inst{21-28} = xo; + let Inst{29} = XAp{4}; + let Inst{30} = XB{5}; + let Inst{31} = 0; +} + +class MMIRR_XX3Form_X8YP4_XAp5B6 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<3> AT; + bits<6> XAp; + bits<6> XB; + bits<8> XMSK; + bits<4> YMSK; + bits<4> PMSK; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 3; + let Inst{8-11} = 9; + let Inst{12-15} = 0; + let Inst{16-19} = PMSK; + let Inst{20-27} = XMSK; + let Inst{28-31} = YMSK; + + // The instruction. + let Inst{38-40} = AT; + let Inst{41-42} = 0; + let Inst{43-46} = XAp{3-0}; + let Inst{47} = 0; + let Inst{48-52} = XB{4-0}; + let Inst{53-60} = xo; + let Inst{61} = XAp{4}; + let Inst{62} = XB{5}; + let Inst{63} = 0; +} + +multiclass DMR_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [IsISAFuture] in { + def NAME : + XX3Form_AT3_XAp5B6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAp5B6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +multiclass DMR_UM_M448_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : DMR_UM_XOEO; + let Predicates = [IsISAFuture] in { + def PM#NAME : + MMIRR_XX3Form_X8YP4_XAp5B6< + opcode, !or(xo, 0x01), (outs dmr:$AT), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X8YP4_XAp5B6< + opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + let Predicates = [IsISAFuture] in { def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226, (outs vsrprc:$XAp, vsrprc:$XBp), @@ -116,4 +213,56 @@ let Predicates = [IsISAFuture] in { def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), "dmsetdmrz $AT", NoItinerary, [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>; + +// MMA+ accumulating/non-accumulating instructions. + +// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP +defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB), + "dmxvi8gerx4", "$AT, $XAp, $XB">; + +let Predicates = [MMA, IsISAFuture] in { + def DMXVI8GERX4SPP : + XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB), + "dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { + def PMDMXVI8GERX4SPP : + MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT), + (ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK, + u4imm:$YMSK, u4imm:$PMSK), + "pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +// MMA+ Intrinsics +let Predicates = [MMA, IsISAFuture] in { + def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)), + (DMXVI8GERX4 $XAp, RCCp.BToVSRC)>; + def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>; + + def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + (DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>; +} + +let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)), + (PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk4Imm:$PMSK)), + (PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index 161d4d3c492f3..fd8418a6c50ea 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -1,12 +1,4 @@ -// Mask immediates for MMA instructions (2, 4 and 8 bits). -def Msk2Imm : ImmLeaf(Imm); }]>; -def Msk4Imm : ImmLeaf(Imm); }]>; -def Msk8Imm : ImmLeaf(Imm); }]>; - -def MMA : Predicate<"Subtarget->hasMMA()">; - - // Multiclass definitions for MMA accumulator instructions. // ---------------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll new file mode 100644 index 0000000000000..774b13e0fd2d3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll @@ -0,0 +1,287 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>) + +define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvi8gerx4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv v3, 0(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: lxv v2, 16(r3) +; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvi8gerx4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv v3, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <256 x i1>, ptr %vpp, align 32 + %1 = load <16 x i8>, ptr %vcp, align 32 + %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %0, <16 x i8> %1) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvi8gerx4pp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: dmxvi8gerx4pp dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvi8gerx4pp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: dmxvi8gerx4pp dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vop, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = load <16 x i8>, ptr %vcp, align 32 + %3 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2) + store <1024 x i1> %3, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1>, <256 x i1>, <16 x i8>) + +define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_dmxvi8gerx4spp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: dmxvi8gerx4spp dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_dmxvi8gerx4spp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: dmxvi8gerx4spp dmr0, vsp34, vs0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vop, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = load <16 x i8>, ptr %vcp, align 32 + %3 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2) + store <1024 x i1> %3, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvi8gerx4pp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvi8gerx4pp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vop, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = load <16 x i8>, ptr %vcp, align 32 + %3 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2, i32 0, i32 0, i32 0) + store <1024 x i1> %3, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1>, <16 x i8>, i32, i32, i32) + +define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvi8gerx4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv v3, 0(r3) +; CHECK-NEXT: lxv vs0, 0(r4) +; CHECK-NEXT: lxv v2, 16(r3) +; CHECK-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvi8gerx4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxv v3, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %0 = load <256 x i1>, ptr %vpp, align 32 + %1 = load <16 x i8>, ptr %vcp, align 32 + %2 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> %0, <16 x i8> %1, i32 0, i32 0, i32 0) + store <1024 x i1> %2, ptr %resp, align 64 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32) + +define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { +; CHECK-LABEL: test_pmdmxvi8gerx4spp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: lxv vs0, 0(r5) +; CHECK-NEXT: lxv v2, 16(r4) +; CHECK-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r6) +; CHECK-NEXT: stxvp vsp36, 64(r6) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r6) +; CHECK-NEXT: stxvp vsp36, 0(r6) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test_pmdmxvi8gerx4spp: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxv v3, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r5) +; CHECK-BE-NEXT: lxv v2, 0(r4) +; CHECK-BE-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r6) +; CHECK-BE-NEXT: stxvp vsp34, 64(r6) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r6) +; CHECK-BE-NEXT: stxvp vsp34, 0(r6) +; CHECK-BE-NEXT: blr +entry: + %0 = load <1024 x i1>, ptr %vop, align 64 + %1 = load <256 x i1>, ptr %vpp, align 32 + %2 = load <16 x i8>, ptr %vcp, align 32 + %3 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2, i32 0, i32 0, i32 0) + store <1024 x i1> %3, ptr %resp, align 64 + ret void +} diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt index aa676185cc6f2..ac82644df6bc2 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -75,3 +75,21 @@ #CHECK: stxvprll 6, 0, 1 0x7c 0xc0 0x0d 0xda + +#CHECK: dmxvi8gerx4 1, 2, 4 +0xec,0x82,0x20,0x58 + +#CHECK: dmxvi8gerx4pp 1, 0, 2 +0xec,0x80,0x10,0x50 + +#CHECK: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 +0x07,0x90,0x40,0x84,0xec,0x02,0x20,0x58 + +#CHECK: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 +0x07,0x90,0x40,0x84,0xec,0x80,0x20,0x50 + +#CHECK: dmxvi8gerx4spp 1, 2, 4 +0xec,0x82,0x23,0x10 + +#CHECK: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 +[0x07,0x90,0x40,0x84,0xec,0x02,0x23,0x10] diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt index 827142d3f505d..639904a1b0f40 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -69,3 +69,21 @@ #CHECK: stxvprll 6, 0, 1 0xda 0x0d 0xc0 0x7c + +#CHECK: dmxvi8gerx4 1, 2, 4 +0x58,0x20,0x82,0xec + +#CHECK: dmxvi8gerx4pp 1, 0, 2 +0x50,0x10,0x80,0xec + +#CHECK: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 +0x84,0x40,0x90,0x07,0x58,0x20,0x02,0xec + +#CHECK: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 +0x84,0x40,0x90,0x07,0x50,0x20,0x80,0xec + +#CHECK: dmxvi8gerx4spp 1, 2, 4 +0x10,0x23,0x82,0xec + +#CHECK: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 +0x84,0x40,0x90,0x07,0x10,0x23,0x02,0xec diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s index 7e4f0d8658546..f57bd50c9d697 100644 --- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -96,3 +96,33 @@ # CHECK-BE: stxvprll 6, 0, 1 # encoding: [0x7c,0xc0,0x0d,0xda] # CHECK-LE: stxvprll 6, 0, 1 # encoding: [0xda,0x0d,0xc0,0x7c] stxvprll 6, 0, 1 + + dmxvi8gerx4 1, 2, 4 +# CHECK-BE: dmxvi8gerx4 1, 2, 4 # encoding: [0xec,0x82,0x20,0x58] +# CHECK-LE: dmxvi8gerx4 1, 2, 4 # encoding: [0x58,0x20,0x82,0xec] + + dmxvi8gerx4pp 1, 0, 2 +# CHECK-BE: dmxvi8gerx4pp 1, 0, 2 # encoding: [0xec,0x80,0x10,0x50] +# CHECK-LE: dmxvi8gerx4pp 1, 0, 2 # encoding: [0x50,0x10,0x80,0xec] + + pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 +# CHECK-BE: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84, +# CHECK-BE-SAME: 0xec,0x02,0x20,0x58] +# CHECK-LE: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07, +# CHECK-LE-SAME: 0x58,0x20,0x02,0xec] + + pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 +#CHECK-BE: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84, +#CHECK-BE-SAME: 0xec,0x80,0x20,0x50] +#CHECK-LE: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07, +#CHECK-LE-SAME: 0x50,0x20,0x80,0xec] + + dmxvi8gerx4spp 1, 2, 4 +#CHECK-BE: dmxvi8gerx4spp 1, 2, 4 # encoding: [0xec,0x82,0x23,0x10] +#CHECK-LE: dmxvi8gerx4spp 1, 2, 4 # encoding: [0x10,0x23,0x82,0xec] + + pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 +#CHECK-BE: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84, +#CHECK-BE-SAME: 0xec,0x02,0x23,0x10] +#CHECK-LE: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07, +#CHECK-LE-SAME: 0x10,0x23,0x02,0xec] From 0bb02fa3346d1d72167d4b89019d05f1a9f73bc1 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Wed, 12 Mar 2025 21:47:06 +0000 Subject: [PATCH 2/3] Add missing predicates and move the MMA mask immediate defs to PPCInstrInfo.td --- llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 11 ++--------- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 7 +++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index 17955a3fb1fd8..8cb926eef4da6 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -11,13 +11,6 @@ // //===----------------------------------------------------------------------===// -// Mask immediates for MMA instructions (2, 4 and 8 bits). -def Msk2Imm : ImmLeaf(Imm); }]>; -def Msk4Imm : ImmLeaf(Imm); }]>; -def Msk8Imm : ImmLeaf(Imm); }]>; - -def MMA : Predicate<"Subtarget->hasMMA()">; - class XX3Form_AT3_XABp5_P1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, list pattern> : I { @@ -132,7 +125,7 @@ class MMIRR_XX3Form_X8YP4_XAp5B6 opcode, bits<8> xo, dag OOL, dag IOL, multiclass DMR_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { - let Predicates = [IsISAFuture] in { + let Predicates = [MMA, IsISAFuture] in { def NAME : XX3Form_AT3_XAp5B6, @@ -147,7 +140,7 @@ multiclass DMR_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, multiclass DMR_UM_M448_XOEO opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { defm NAME : DMR_UM_XOEO; - let Predicates = [IsISAFuture] in { + let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { def PM#NAME : MMIRR_XX3Form_X8YP4_XAp5B6< opcode, !or(xo, 0x01), (outs dmr:$AT), diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index be90a5c562c57..175ba6009364a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3474,6 +3474,13 @@ class PPCAsmPseudo let TSFlags{10} = MemriOp; } +// Mask immediates for MMA instructions (2, 4 and 8 bits). +def Msk2Imm : ImmLeaf(Imm); }]>; +def Msk4Imm : ImmLeaf(Imm); }]>; +def Msk8Imm : ImmLeaf(Imm); }]>; + +def MMA : Predicate<"Subtarget->hasMMA()">; + // Prefixed instructions may require access to the above defs at a later // time so we include this after the def. include "PPCInstrP10.td" From ea7a6248a8712c9fd6ce2f846a928ae9eb21c584 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Mon, 17 Mar 2025 16:13:49 +0000 Subject: [PATCH 3/3] Update the test and fix the mask type --- llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 5 +- .../test/CodeGen/PowerPC/dmf-outer-product.ll | 68 +++++++++---------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index 8cb926eef4da6..f7f2c855da5d7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -206,6 +206,7 @@ let Predicates = [IsISAFuture] in { def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), "dmsetdmrz $AT", NoItinerary, [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>; +} // MMA+ accumulating/non-accumulating instructions. @@ -255,7 +256,7 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + Msk4Imm:$PMSK)), (PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; } diff --git a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll index 774b13e0fd2d3..3112666f0188c 100644 --- a/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/dmf-outer-product.ll @@ -37,10 +37,10 @@ define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: stxvp vsp34, 0(r5) ; CHECK-BE-NEXT: blr entry: - %0 = load <256 x i1>, ptr %vpp, align 32 - %1 = load <16 x i8>, ptr %vcp, align 32 - %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %0, <16 x i8> %1) - store <1024 x i1> %2, ptr %resp, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 ret void } @@ -87,11 +87,11 @@ define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: stxvp vsp34, 0(r6) ; CHECK-BE-NEXT: blr entry: - %0 = load <1024 x i1>, ptr %vop, align 64 - %1 = load <256 x i1>, ptr %vpp, align 32 - %2 = load <16 x i8>, ptr %vcp, align 32 - %3 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2) - store <1024 x i1> %3, ptr %resp, align 64 + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 ret void } @@ -138,11 +138,11 @@ define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: stxvp vsp34, 0(r6) ; CHECK-BE-NEXT: blr entry: - %0 = load <1024 x i1>, ptr %vop, align 64 - %1 = load <256 x i1>, ptr %vpp, align 32 - %2 = load <16 x i8>, ptr %vcp, align 32 - %3 = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2) - store <1024 x i1> %3, ptr %resp, align 64 + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2) + store <1024 x i1> %call, ptr %resp, align 64 ret void } @@ -160,7 +160,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-NEXT: lxv v3, 0(r4) ; CHECK-NEXT: lxv vs0, 0(r5) ; CHECK-NEXT: lxv v2, 16(r4) -; CHECK-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 42, 7, 9 ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 ; CHECK-NEXT: stxvp vsp34, 96(r6) ; CHECK-NEXT: stxvp vsp36, 64(r6) @@ -180,7 +180,7 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: lxv v3, 16(r4) ; CHECK-BE-NEXT: lxv vs0, 0(r5) ; CHECK-BE-NEXT: lxv v2, 0(r4) -; CHECK-BE-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 42, 7, 9 ; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r6) ; CHECK-BE-NEXT: stxvp vsp34, 64(r6) @@ -189,11 +189,11 @@ define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: stxvp vsp34, 0(r6) ; CHECK-BE-NEXT: blr entry: - %0 = load <1024 x i1>, ptr %vop, align 64 - %1 = load <256 x i1>, ptr %vpp, align 32 - %2 = load <16 x i8>, ptr %vcp, align 32 - %3 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2, i32 0, i32 0, i32 0) - store <1024 x i1> %3, ptr %resp, align 64 + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 42, i32 7, i32 9) + store <1024 x i1> %call, ptr %resp, align 64 ret void } @@ -205,7 +205,7 @@ define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-NEXT: lxv v3, 0(r3) ; CHECK-NEXT: lxv vs0, 0(r4) ; CHECK-NEXT: lxv v2, 16(r3) -; CHECK-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10 ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 ; CHECK-NEXT: stxvp vsp34, 96(r5) ; CHECK-NEXT: stxvp vsp36, 64(r5) @@ -219,7 +219,7 @@ define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: lxv v3, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r4) ; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10 ; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r5) ; CHECK-BE-NEXT: stxvp vsp34, 64(r5) @@ -228,10 +228,10 @@ define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) { ; CHECK-BE-NEXT: stxvp vsp34, 0(r5) ; CHECK-BE-NEXT: blr entry: - %0 = load <256 x i1>, ptr %vpp, align 32 - %1 = load <16 x i8>, ptr %vcp, align 32 - %2 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> %0, <16 x i8> %1, i32 0, i32 0, i32 0) - store <1024 x i1> %2, ptr %resp, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> %v1, <16 x i8> %v2, i32 55, i32 5, i32 10) + store <1024 x i1> %call, ptr %resp, align 64 ret void } @@ -249,7 +249,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr % ; CHECK-NEXT: lxv v3, 0(r4) ; CHECK-NEXT: lxv vs0, 0(r5) ; CHECK-NEXT: lxv v2, 16(r4) -; CHECK-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 100, 6, 12 ; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 ; CHECK-NEXT: stxvp vsp34, 96(r6) ; CHECK-NEXT: stxvp vsp36, 64(r6) @@ -269,7 +269,7 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr % ; CHECK-BE-NEXT: lxv v3, 16(r4) ; CHECK-BE-NEXT: lxv vs0, 0(r5) ; CHECK-BE-NEXT: lxv v2, 0(r4) -; CHECK-BE-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 0, 0, 0 +; CHECK-BE-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 100, 6, 12 ; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 ; CHECK-BE-NEXT: stxvp vsp36, 96(r6) ; CHECK-BE-NEXT: stxvp vsp34, 64(r6) @@ -278,10 +278,10 @@ define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr % ; CHECK-BE-NEXT: stxvp vsp34, 0(r6) ; CHECK-BE-NEXT: blr entry: - %0 = load <1024 x i1>, ptr %vop, align 64 - %1 = load <256 x i1>, ptr %vpp, align 32 - %2 = load <16 x i8>, ptr %vcp, align 32 - %3 = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> %0, <256 x i1> %1, <16 x i8> %2, i32 0, i32 0, i32 0) - store <1024 x i1> %3, ptr %resp, align 64 + %v.dmr = load <1024 x i1>, ptr %vop, align 64 + %v1 = load <256 x i1>, ptr %vpp, align 32 + %v2 = load <16 x i8>, ptr %vcp, align 32 + %call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 100, i32 6, i32 12) + store <1024 x i1> %call, ptr %resp, align 64 ret void }