From cd3606902144d367503db089607664099e4e1918 Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Wed, 5 Mar 2025 19:16:12 +0000 Subject: [PATCH 1/3] dense math enablement --- llvm/include/llvm/IR/IntrinsicsPowerPC.td | 10 ++++++++++ llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td | 9 ++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 6f49ed39d8a09..af66b8206182e 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1644,6 +1644,16 @@ let TargetPrefix = "ppc" in { def int_ppc_mma_xxsetaccz : DefaultAttrsIntrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>; + def int_ppc_mma_dmsetdmrz : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [], [IntrNoMem]>; + + def int_ppc_mma_dmmr : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty], [IntrNoMem]>; + + def int_ppc_mma_dmxor : + DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, + llvm_v1024i1_ty], [IntrNoMem]>; + // MMA Reduced-Precision: Outer Product Intrinsic Definitions. defm int_ppc_mma_xvi4ger8 : PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index 4da2969857d55..b7100462cb967 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -105,12 +105,15 @@ let Predicates = [IsISAFuture] in { "dmxxinstfdmr256 $AT, $XBp, $P", []>; def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB), - "dmmr $AT, $AB", []>; + "dmmr $AT, $AB", + [(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>; def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB), - "dmxor $AT, $AB", []>, + "dmxor $AT, $AB", + [(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, v1024i1:$AB))]>, RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), - "dmsetdmrz $AT", NoItinerary, []>; + "dmsetdmrz $AT", NoItinerary, + [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>; } From f46c4bc142d1db070d1265ad25b091d0a5482ce4 Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Wed, 5 Mar 2025 19:50:01 +0000 Subject: [PATCH 2/3] test --- llvm/test/CodeGen/PowerPC/dmr-enable.ll | 134 ++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/dmr-enable.ll diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll new file mode 100644 index 0000000000000..e4e86148ca044 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +define void @tdmrz(ptr nocapture readonly %vp1, ptr nocapture %resp) { +; CHECK-LABEL: tdmrz: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: dmsetdmrz dmr0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: tdmrz: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: dmsetdmrz dmr0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz() + store <1024 x i1> %z, ptr %resp, align 32 + ret void +} + +define void @tdmmr(ptr nocapture readonly %vp1, ptr nocapture %resp) { +; CHECK-LABEL: tdmmr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmmr dmr0, dmr0 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r4) +; CHECK-NEXT: stxvp vsp36, 64(r4) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r4) +; CHECK-NEXT: stxvp vsp36, 0(r4) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: tdmmr: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmmr dmr0, dmr0 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r4) +; CHECK-BE-NEXT: stxvp vsp34, 64(r4) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r4) +; CHECK-BE-NEXT: stxvp vsp34, 0(r4) +; CHECK-BE-NEXT: blr +entry: + %l = load <1024 x i1>, ptr %vp1, align 32 + %c = call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %l) + store <1024 x i1> %c, ptr %resp, align 32 + ret void +} + +define void @tdmxor(ptr nocapture readonly %vp1, ptr %vp2, ptr nocapture %resp) { +; CHECK-LABEL: tdmxor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: lxvp vsp34, 0(r4) +; CHECK-NEXT: lxvp vsp36, 32(r4) +; CHECK-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r4) +; CHECK-NEXT: lxvp vsp36, 96(r4) +; CHECK-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0 +; CHECK-NEXT: dmxor dmr0, dmr1 +; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: stxvp vsp34, 96(r5) +; CHECK-NEXT: stxvp vsp36, 64(r5) +; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: stxvp vsp34, 32(r5) +; CHECK-NEXT: stxvp vsp36, 0(r5) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: tdmxor: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvp vsp34, 96(r3) +; CHECK-BE-NEXT: lxvp vsp36, 64(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r3) +; CHECK-BE-NEXT: lxvp vsp36, 0(r3) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-BE-NEXT: lxvp vsp34, 96(r4) +; CHECK-BE-NEXT: lxvp vsp36, 64(r4) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-BE-NEXT: lxvp vsp34, 32(r4) +; CHECK-BE-NEXT: lxvp vsp36, 0(r4) +; CHECK-BE-NEXT: dmxxinstfdmr512 wacc1, vsp36, vsp34, 0 +; CHECK-BE-NEXT: dmxor dmr0, dmr1 +; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-BE-NEXT: stxvp vsp36, 96(r5) +; CHECK-BE-NEXT: stxvp vsp34, 64(r5) +; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: stxvp vsp36, 32(r5) +; CHECK-BE-NEXT: stxvp vsp34, 0(r5) +; CHECK-BE-NEXT: blr +entry: + %l = load <1024 x i1>, ptr %vp1, align 32 + %r = load <1024 x i1>, ptr %vp2, align 32 + %x = call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %l, <1024 x i1> %r) + store <1024 x i1> %x, ptr %resp, align 32 + ret void +} + +declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz() +declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>) +declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>) From 0b20517cf532644f32df06ab250bff7dc5254ab0 Mon Sep 17 00:00:00 2001 From: Roland Froese Date: Mon, 10 Mar 2025 21:18:50 +0000 Subject: [PATCH 3/3] fix test --- llvm/test/CodeGen/PowerPC/dmr-enable.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll index e4e86148ca044..31c61190c0729 100644 --- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll +++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll @@ -2,7 +2,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE