Skip to content

Commit da682f2

Browse files
author
Zaara Syeda
committed
[PPC] Add custom lowering for uaddo
Improve the codegen for uaddo node for i64 in 64-bit mode and i32 in 32-bit mode by custom lowering.
1 parent 2b0a708 commit da682f2

File tree

6 files changed

+175
-9
lines changed

6 files changed

+175
-9
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
198198
}
199199
}
200200

201+
if (!Subtarget.hasP10Vector())
202+
setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
203+
201204
// Match BITREVERSE to customized fast code sequence in the td file.
202205
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
203206
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -11967,11 +11970,51 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
1196711970
llvm_unreachable("ERROR:Should return for all cases within swtich.");
1196811971
}
1196911972

11973+
SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
11974+
// Default to target independent lowering if there is a logical user of the
11975+
// carry-bit.
11976+
for (SDNode *U : Op->uses()) {
11977+
if (U->getOpcode() == ISD::SELECT || ISD::isBitwiseLogicOp(U->getOpcode()))
11978+
return SDValue();
11979+
}
11980+
SDValue LHS = Op.getOperand(0);
11981+
SDValue RHS = Op.getOperand(1);
11982+
SDLoc dl(Op);
11983+
11984+
// Default to target independent lowering for special cases handled there.
11985+
if (isOneConstant(RHS) || isAllOnesConstant(RHS))
11986+
return SDValue();
11987+
11988+
EVT VT = Op.getNode()->getValueType(0);
11989+
bool is64Bit = Subtarget.isPPC64();
11990+
11991+
SDValue ADDC;
11992+
SDValue Overflow;
11993+
SDVTList VTs = Op.getNode()->getVTList();
11994+
11995+
ADDC = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDC8 : PPC::ADDC, dl, VT,
11996+
MVT::Glue, LHS, RHS),
11997+
0);
11998+
SDValue Li = SDValue(DAG.getMachineNode(is64Bit ? PPC::LI8 : PPC::LI, dl, VT,
11999+
DAG.getTargetConstant(0, dl, VT)),
12000+
0);
12001+
Overflow = SDValue(DAG.getMachineNode(is64Bit ? PPC::ADDZE8 : PPC::ADDZE, dl,
12002+
VT, MVT::Glue, Li, ADDC.getValue(1)),
12003+
0);
12004+
SDValue OverflowTrunc =
12005+
DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12006+
SDValue Res =
12007+
DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12008+
return Res;
12009+
}
12010+
1197012011
/// LowerOperation - Provide custom lowering hooks for some operations.
1197112012
///
1197212013
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1197312014
switch (Op.getOpcode()) {
1197412015
default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12016+
case ISD::UADDO:
12017+
return LowerUaddo(Op, DAG);
1197512018
case ISD::FPOW: return lowerPow(Op, DAG);
1197612019
case ISD::FSIN: return lowerSin(Op, DAG);
1197712020
case ISD::FCOS: return lowerCos(Op, DAG);

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,7 @@ namespace llvm {
12771277
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
12781278
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
12791279
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1280+
SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
12801281
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
12811282
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
12821283
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ struct PPCMIPeephole : public MachineFunctionPass {
139139
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
140140
MachineInstr *MI);
141141

142+
bool eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
143+
MachineInstr *&ToErase);
142144
// A number of transformations will eliminate the definition of a register
143145
// as all of its uses will be removed. However, this leaves a register
144146
// without a definition for LiveVariables. Such transformations should
@@ -1071,6 +1073,18 @@ bool PPCMIPeephole::simplifyCode() {
10711073
break;
10721074
}
10731075
case PPC::RLDICL: {
1076+
Register SrcReg = MI.getOperand(1).getReg();
1077+
if (!SrcReg.isVirtual())
1078+
break;
1079+
1080+
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1081+
// We can eliminate clearing the left 63 bits when only the carry-bit is
1082+
// set.
1083+
if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
1084+
Simplified = true;
1085+
break;
1086+
}
1087+
10741088
// We can eliminate RLDICL (e.g. for zero-extension)
10751089
// if all bits to clear are already zero in the input.
10761090
// This code assume following code sequence for zero-extension.
@@ -1082,11 +1096,6 @@ bool PPCMIPeephole::simplifyCode() {
10821096
if (MI.getOperand(2).getImm() != 0)
10831097
break;
10841098

1085-
Register SrcReg = MI.getOperand(1).getReg();
1086-
if (!SrcReg.isVirtual())
1087-
break;
1088-
1089-
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
10901099
if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
10911100
SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
10921101
break;
@@ -1277,7 +1286,15 @@ bool PPCMIPeephole::simplifyCode() {
12771286
Simplified = true;
12781287
break;
12791288
}
1280-
case PPC::RLWINM:
1289+
case PPC::RLWINM: {
1290+
// We can eliminate clearing the left 31 bits when only the carry-bit is
1291+
// set.
1292+
if (eliminateTruncWhenLoweringUADDO(MI, ToErase)) {
1293+
Simplified = true;
1294+
break;
1295+
}
1296+
}
1297+
LLVM_FALLTHROUGH;
12811298
case PPC::RLWINM_rec:
12821299
case PPC::RLWINM8:
12831300
case PPC::RLWINM8_rec: {
@@ -1889,6 +1906,38 @@ bool PPCMIPeephole::eliminateRedundantCompare() {
18891906

18901907
return Simplified;
18911908
}
1909+
bool PPCMIPeephole::eliminateTruncWhenLoweringUADDO(MachineInstr &MI,
1910+
MachineInstr *&ToErase) {
1911+
Register SrcReg = MI.getOperand(1).getReg();
1912+
if (!SrcReg.isVirtual())
1913+
return false;
1914+
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1915+
1916+
bool Is64Bit = MI.getOpcode() == PPC::RLDICL;
1917+
int Imm = Is64Bit ? 63 : 31;
1918+
if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != Imm)
1919+
return false;
1920+
if (SrcMI->getOpcode() != (Is64Bit ? PPC::ADDZE8 : PPC::ADDZE))
1921+
return false;
1922+
MachineInstr *LI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1923+
if (LI->getOpcode() != (Is64Bit ? PPC::LI8 : PPC::LI))
1924+
return false;
1925+
if (LI->getOperand(1).getImm() != 0 || MI.getOperand(2).getImm() != 0)
1926+
return false;
1927+
Register NewReg = SrcMI->getOperand(0).getReg();
1928+
ToErase = &MI;
1929+
Register MIDestReg = MI.getOperand(0).getReg();
1930+
for (MachineInstr &UseMI : MRI->use_instructions(MIDestReg)) {
1931+
for (MachineOperand &MO : UseMI.operands()) {
1932+
if (MO.isReg() && MO.getReg() == MIDestReg) {
1933+
MO.setReg(NewReg);
1934+
addRegToUpdate(NewReg);
1935+
break;
1936+
}
1937+
}
1938+
}
1939+
return true;
1940+
}
18921941

18931942
// We miss the opportunity to emit an RLDIC when lowering jump tables
18941943
// since ISEL sees only a single basic block. When selecting, the clear

llvm/test/CodeGen/PowerPC/sat-add.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,10 @@ define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) {
170170
define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) {
171171
; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval:
172172
; CHECK: # %bb.0:
173-
; CHECK-NEXT: li 5, -43
174173
; CHECK-NEXT: addi 4, 3, 42
175-
; CHECK-NEXT: cmpld 3, 5
174+
; CHECK-NEXT: cmpld 4, 3
176175
; CHECK-NEXT: li 3, -1
177-
; CHECK-NEXT: iselgt 3, 3, 4
176+
; CHECK-NEXT: isellt 3, 3, 4
178177
; CHECK-NEXT: blr
179178
%a = add i64 %x, 42
180179
%c = icmp ugt i64 %x, -43
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
3+
4+
define noundef i32 @add(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
5+
; CHECK-LABEL: add:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: li 6, 0
8+
; CHECK-NEXT: addc 3, 3, 4
9+
; CHECK-NEXT: addze 4, 6
10+
; CHECK-NEXT: stw 4, 0(5)
11+
; CHECK-NEXT: blr
12+
entry:
13+
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
14+
%1 = extractvalue { i32, i1 } %0, 1
15+
%2 = extractvalue { i32, i1 } %0, 0
16+
%3 = zext i1 %1 to i32
17+
store i32 %3, ptr %ovf, align 8
18+
ret i32 %2
19+
}
20+
21+
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
22+
23+
define noundef zeroext i1 @add_overflow(i32 noundef %a, i32 noundef %b, ptr nocapture noundef writeonly %ovf) {
24+
; CHECK-LABEL: add_overflow:
25+
; CHECK: # %bb.0: # %entry
26+
; CHECK-NEXT: li 6, 0
27+
; CHECK-NEXT: addc 4, 3, 4
28+
; CHECK-NEXT: addze 3, 6
29+
; CHECK-NEXT: stw 4, 0(5)
30+
; CHECK-NEXT: blr
31+
entry:
32+
%0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
33+
%1 = extractvalue { i32, i1 } %0, 1
34+
%2 = extractvalue { i32, i1 } %0, 0
35+
store i32 %2, ptr %ovf, align 8
36+
ret i1 %1
37+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
3+
4+
define noundef i64 @add(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
5+
; PPC64-LABEL: add:
6+
; PPC64: # %bb.0: # %entry
7+
; PPC64-NEXT: li 6, 0
8+
; PPC64-NEXT: addc 3, 3, 4
9+
; PPC64-NEXT: addze 4, 6
10+
; PPC64-NEXT: std 4, 0(5)
11+
; PPC64-NEXT: blr
12+
entry:
13+
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
14+
%1 = extractvalue { i64, i1 } %0, 1
15+
%2 = extractvalue { i64, i1 } %0, 0
16+
%3 = zext i1 %1 to i64
17+
store i64 %3, ptr %ovf, align 8
18+
ret i64 %2
19+
}
20+
21+
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64)
22+
23+
define noundef zeroext i1 @add_overflow(i64 noundef %a, i64 noundef %b, ptr nocapture noundef writeonly %ovf) {
24+
; PPC64-LABEL: add_overflow:
25+
; PPC64: # %bb.0: # %entry
26+
; PPC64-NEXT: li 6, 0
27+
; PPC64-NEXT: addc 4, 3, 4
28+
; PPC64-NEXT: addze 3, 6
29+
; PPC64-NEXT: std 4, 0(5)
30+
; PPC64-NEXT: blr
31+
entry:
32+
%0 = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
33+
%1 = extractvalue { i64, i1 } %0, 1
34+
%2 = extractvalue { i64, i1 } %0, 0
35+
store i64 %2, ptr %ovf, align 8
36+
ret i1 %1
37+
}

0 commit comments

Comments
 (0)