Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19208,13 +19208,58 @@ static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
return MatPCRel;
}

// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
// Mathematical identity: X + 1 = X - (-1)
// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
// Requirement: VSX feature for efficient xxleqv generation
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) {

EVT VT = N->getValueType(0);
if (!Subtarget.hasVSX())
return SDValue();

// Handle v2i64, v4i32, v8i16 and v16i8 types
if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
VT == MVT::v2i64))
return SDValue();

SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);

// Check if RHS is BUILD_VECTOR
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();

// Check if all the elements are 1
unsigned NumOfEles = RHS.getNumOperands();
for (unsigned i = 0; i < NumOfEles; ++i) {
auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
if (!CN || CN->getSExtValue() != 1)
return SDValue();
}
SDLoc DL(N);

SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
SmallVector<SDValue, 4> Ops(4, MinusOne);
SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);

// Bitcast to the target vector type
SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);

return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
}

SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
return Value;

if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
return Value;

if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
return Value;
return SDValue();
}

Expand Down
19 changes: 9 additions & 10 deletions llvm/test/CodeGen/PowerPC/addition-vector-all-ones.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@
; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s

; The addition of vector `A` with vector of 1s currently uses `vspltisw` to generate vector of 1s followed by add operation.
; Optimized version which `xxleqv` and `vsubu` to generate vector of -1s to leverage the identity A - (-1) = A + 1.

; Function for the vector type v2i64 `a + {1, 1}`
define <2 x i64> @test_v2i64(<2 x i64> %a) {
; CHECK-LABEL: test_v2i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vspltisw v3, 1
; CHECK-NEXT: vupklsw v3, v3
; CHECK-NEXT: vaddudm v2, v2, v3
; CHECK-NEXT: xxleqv v3, v3, v3
; CHECK-NEXT: vsubudm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <2 x i64> %a, splat (i64 1)
Expand All @@ -27,8 +26,8 @@ entry:
define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-LABEL: test_v4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vspltisw v3, 1
; CHECK-NEXT: vadduwm v2, v2, v3
; CHECK-NEXT: xxleqv v3, v3, v3
; CHECK-NEXT: vsubuwm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <4 x i32> %a, splat (i32 1)
Expand All @@ -39,8 +38,8 @@ entry:
define <8 x i16> @test_v8i16(<8 x i16> %a) {
; CHECK-LABEL: test_v8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vspltish v3, 1
; CHECK-NEXT: vadduhm v2, v2, v3
; CHECK-NEXT: xxleqv v3, v3, v3
; CHECK-NEXT: vsubuhm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <8 x i16> %a, splat (i16 1)
Expand All @@ -51,8 +50,8 @@ entry:
define <16 x i8> @test_16i8(<16 x i8> %a) {
; CHECK-LABEL: test_16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v3, 1
; CHECK-NEXT: vaddubm v2, v2, v3
; CHECK-NEXT: xxleqv v3, v3, v3
; CHECK-NEXT: vsububm v2, v2, v3
; CHECK-NEXT: blr
entry:
%add = add <16 x i8> %a, splat (i8 1)
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
; VSX-LABEL: increment_by_one:
; VSX: # %bb.0:
; VSX-NEXT: vspltisw 3, 1
; VSX-NEXT: vupklsw 3, 3
; VSX-NEXT: vaddudm 2, 2, 3
; VSX-NEXT: xxleqv 35, 35, 35
; VSX-NEXT: vsubudm 2, 2, 3
; VSX-NEXT: blr
;
; NOVSX-LABEL: increment_by_one:
Expand Down