From 5356180ec5440547aebad7d2978b1e7d92a978d8 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 4 Jun 2025 13:01:16 -0500 Subject: [PATCH 1/2] [PowerPC] For cpu=future allow auto generation of stxvp instructions by default --- llvm/lib/Target/PowerPC/PPC.td | 3 ++- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++---- llvm/test/CodeGen/PowerPC/dmr-spill.ll | 6 +++--- llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll | 2 -- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 6b058d1a74772..fd850faf7b2fb 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -482,7 +482,8 @@ def ProcessorFeatures { // Future // For future CPU we assume that all of the existing features from Power11 // still exist with the exception of those we know are Power11 specific. - list FutureAdditionalFeatures = [FeatureISAFuture]; + list FutureAdditionalFeatures = [DirectivePwrFuture, + FeatureISAFuture]; list FutureSpecificFeatures = []; list FutureInheritableFeatures = !listconcat(P11InheritableFeatures, FutureAdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0c2a506005604..94e95953363db 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1476,7 +1476,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setMinFunctionAlignment(Align(4)); - switch (Subtarget.getCPUDirective()) { + auto CPUDirective = Subtarget.getCPUDirective(); + switch (CPUDirective) { default: break; case PPC::DIR_970: case PPC::DIR_A2: @@ -1508,15 +1509,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). - if (Subtarget.getCPUDirective() == PPC::DIR_E500mc || - Subtarget.getCPUDirective() == PPC::DIR_E5500) { + if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; - } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) { + } else if (CPUDirective == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. @@ -1529,6 +1529,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxLoadsPerMemcmpOptSize = 4; } + // Enable generation of STXVP instructions by default for mcpu=future. + if (CPUDirective == PPC::DIR_PWR_FUTURE && + !DisableAutoPairedVecSt.getNumOccurrences()) + DisableAutoPairedVecSt = false; + IsStrictFPEnabled = true; // Let the subtarget (CPU) decide if a predictable select is more expensive diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll index b224643a6dd9f..c1b01cd2d3fd5 100644 --- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll +++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ -; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ -; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32 declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>) diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll index c2c8a42c402a2..8dd17abb26347 100644 --- a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll @@ -1,11 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; This test is a copy of mma-acc-spill.ll except that it uses mcpu=future. ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -disable-auto-paired-vec-st=false \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -disable-auto-paired-vec-st=false \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE From d2dfec6dfebcd70f54ab808dd2ee2020da18359b Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 4 Jun 2025 16:14:49 -0500 Subject: [PATCH 2/2] address review comment --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 94e95953363db..3ea9b0b11ce89 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1531,7 +1531,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Enable generation of STXVP instructions by default for mcpu=future. if (CPUDirective == PPC::DIR_PWR_FUTURE && - !DisableAutoPairedVecSt.getNumOccurrences()) + DisableAutoPairedVecSt.getNumOccurrences() == 0) DisableAutoPairedVecSt = false; IsStrictFPEnabled = true;