Skip to content

Commit 2f9f93f

Browse files
AditiRMgithub-actions[bot]
authored andcommitted
Automerge: [PowerPC] Add custom lowering for SADD overflow for i32 and i64 (#159255)
This patch improves the codegen for saddo on i32 and i64 in both 32-bit and 64-bit modes by custom lowering. It implements signed-add overflow detection using the `(x eqv y) & (sum xor x)`bit-level sequence.
2 parents 328c3e3 + fa50a68 commit 2f9f93f

File tree

3 files changed

+48
-13
lines changed

3 files changed

+48
-13
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
210210
// setbc instruction.
211211
if (!Subtarget.hasP10Vector()) {
212212
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
213-
if (isPPC64)
213+
setOperationAction(ISD::SADDO, MVT::i32, Custom);
214+
if (isPPC64) {
214215
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
216+
setOperationAction(ISD::SADDO, MVT::i64, Custom);
217+
}
215218
}
216219

217220
// Match BITREVERSE to customized fast code sequence in the td file.
@@ -12514,6 +12517,37 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
1251412517
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
1251512518
}
1251612519

12520+
/// Implements signed add with overflow detection using the rule:
12521+
/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12522+
SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12523+
12524+
SDLoc dl(Op);
12525+
SDValue LHS = Op.getOperand(0);
12526+
SDValue RHS = Op.getOperand(1);
12527+
EVT VT = Op.getNode()->getValueType(0);
12528+
12529+
SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12530+
12531+
// Compute ~(x xor y)
12532+
SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12533+
SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12534+
// Compute (s xor x)
12535+
SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12536+
12537+
// overflow = (x eqv y) & (s xor x)
12538+
SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12539+
12540+
// Shift sign bit down to LSB
12541+
SDValue Overflow =
12542+
DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12543+
DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12544+
// Truncate to the overflow type (i1)
12545+
SDValue OverflowTrunc =
12546+
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12547+
12548+
return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12549+
}
12550+
1251712551
// Lower unsigned 3-way compare producing -1/0/1.
1251812552
SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
1251912553
SDLoc DL(Op);
@@ -12565,6 +12599,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1256512599
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
1256612600
case ISD::SSUBO:
1256712601
return LowerSSUBO(Op, DAG);
12602+
case ISD::SADDO:
12603+
return LowerSADDO(Op, DAG);
1256812604

1256912605
case ISD::INLINEASM:
1257012606
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,7 @@ namespace llvm {
705705
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
706706
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
707707
SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
708+
SDValue LowerSADDO(SDValue Op, SelectionDAG &DAG) const;
708709
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
709710
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
710711
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/PowerPC/saddo-ssubo.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,11 @@ entry:
4949
define i1 @test_saddo_i32(i32 %a, i32 %b) nounwind {
5050
; CHECK-LABEL: test_saddo_i32:
5151
; CHECK: # %bb.0: # %entry
52-
; CHECK-NEXT: add 5, 3, 4
53-
; CHECK-NEXT: cmpwi 1, 4, 0
54-
; CHECK-NEXT: cmpw 5, 3
55-
; CHECK-NEXT: li 3, 1
56-
; CHECK-NEXT: creqv 20, 4, 0
57-
; CHECK-NEXT: isel 3, 0, 3, 20
52+
; CHECK-NEXT: xor 5, 3, 4
53+
; CHECK-NEXT: add 4, 3, 4
54+
; CHECK-NEXT: xor 3, 4, 3
55+
; CHECK-NEXT: andc 3, 3, 5
56+
; CHECK-NEXT: srwi 3, 3, 31
5857
; CHECK-NEXT: blr
5958
entry:
6059
%res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -65,12 +64,11 @@ entry:
6564
define i1 @test_saddo_i64(i64 %a, i64 %b) nounwind {
6665
; CHECK-LABEL: test_saddo_i64:
6766
; CHECK: # %bb.0: # %entry
68-
; CHECK-NEXT: add 5, 3, 4
69-
; CHECK-NEXT: cmpdi 1, 4, 0
70-
; CHECK-NEXT: cmpd 5, 3
71-
; CHECK-NEXT: li 3, 1
72-
; CHECK-NEXT: creqv 20, 4, 0
73-
; CHECK-NEXT: isel 3, 0, 3, 20
67+
; CHECK-NEXT: xor 5, 3, 4
68+
; CHECK-NEXT: add 4, 3, 4
69+
; CHECK-NEXT: xor 3, 4, 3
70+
; CHECK-NEXT: andc 3, 3, 5
71+
; CHECK-NEXT: rldicl 3, 3, 1, 63
7472
; CHECK-NEXT: blr
7573
entry:
7674
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind

0 commit comments

Comments
 (0)