Skip to content

Commit 07eeb5f

Browse files
authored
[PowerPC] Lower ucmp using subtractions (llvm#146446)
Source: Hacker's delight, page 21. Using the carry, we can use contractions to use the ucmp.
1 parent 7aec3f2 commit 07eeb5f

File tree

4 files changed

+71
-57
lines changed

4 files changed

+71
-57
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
585585
// We cannot sextinreg(i1). Expand to shifts.
586586
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
587587

588+
// Custom handling for PowerPC ucmp instruction
589+
setOperationAction(ISD::UCMP, MVT::i32, Custom);
590+
setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
591+
588592
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
589593
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
590594
// support continuation, user-level threading, and etc.. As a result, no
@@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
1261812622
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
1261912623
}
1262012624

12625+
// Lower unsigned 3-way compare producing -1/0/1.
12626+
SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12627+
SDLoc DL(Op);
12628+
SDValue A = DAG.getFreeze(Op.getOperand(0));
12629+
SDValue B = DAG.getFreeze(Op.getOperand(1));
12630+
EVT OpVT = A.getValueType(); // operand type
12631+
EVT ResVT = Op.getValueType(); // result type
12632+
12633+
// First compute diff = A - B (will become subf).
12634+
SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12635+
12636+
// Generate B - A using SUBC to capture carry.
12637+
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12638+
SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12639+
SDValue CA0 = SubC.getValue(1);
12640+
12641+
// t2 = A - B + CA0 using SUBE.
12642+
SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12643+
SDValue CA1 = SubE1.getValue(1);
12644+
12645+
// res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12646+
SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12647+
12648+
// Extract the first result and truncate to result type if needed
12649+
return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12650+
}
12651+
1262112652
/// LowerOperation - Provide custom lowering hooks for some operations.
1262212653
///
1262312654
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1272212753
case ISD::UADDO_CARRY:
1272312754
case ISD::USUBO_CARRY:
1272412755
return LowerADDSUBO_CARRY(Op, DAG);
12756+
case ISD::UCMP:
12757+
return LowerUCMP(Op, DAG);
1272512758
}
1272612759
}
1272712760

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,7 @@ namespace llvm {
13181318
SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
13191319
SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
13201320
SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const;
1321+
SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
13211322
SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
13221323
SelectionDAG &DAG) const;
13231324
SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,

llvm/test/CodeGen/PowerPC/memcmp.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: ldbrx 3, 0, 3
88
; CHECK-NEXT: ldbrx 4, 0, 4
9-
; CHECK-NEXT: cmpld 3, 4
10-
; CHECK-NEXT: subc 3, 4, 3
11-
; CHECK-NEXT: subfe 3, 4, 4
12-
; CHECK-NEXT: li 4, -1
13-
; CHECK-NEXT: neg 3, 3
14-
; CHECK-NEXT: isellt 3, 4, 3
9+
; CHECK-NEXT: subc 6, 4, 3
10+
; CHECK-NEXT: sub 5, 3, 4
11+
; CHECK-NEXT: subfe 3, 4, 3
12+
; CHECK-NEXT: subfe 3, 3, 5
1513
; CHECK-NEXT: extsw 3, 3
1614
; CHECK-NEXT: blr
1715
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
2321
; CHECK: # %bb.0:
2422
; CHECK-NEXT: lwbrx 3, 0, 3
2523
; CHECK-NEXT: lwbrx 4, 0, 4
26-
; CHECK-NEXT: cmplw 3, 4
27-
; CHECK-NEXT: sub 5, 4, 3
28-
; CHECK-NEXT: li 3, -1
29-
; CHECK-NEXT: rldicl 5, 5, 1, 63
30-
; CHECK-NEXT: isellt 3, 3, 5
24+
; CHECK-NEXT: subc 6, 4, 3
25+
; CHECK-NEXT: sub 5, 3, 4
26+
; CHECK-NEXT: subfe 3, 4, 3
27+
; CHECK-NEXT: subfe 3, 3, 5
28+
; CHECK-NEXT: extsw 3, 3
3129
; CHECK-NEXT: blr
3230
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
3331
ret i32 %call

llvm/test/CodeGen/PowerPC/ucmp.ll

Lines changed: 28 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
55
; CHECK-LABEL: ucmp_8_8:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: cmplw 3, 4
8-
; CHECK-NEXT: sub 5, 4, 3
9-
; CHECK-NEXT: li 3, -1
10-
; CHECK-NEXT: rldicl 5, 5, 1, 63
11-
; CHECK-NEXT: rldic 3, 3, 0, 32
12-
; CHECK-NEXT: isellt 3, 3, 5
7+
; CHECK-NEXT: subc 6, 4, 3
8+
; CHECK-NEXT: sub 5, 3, 4
9+
; CHECK-NEXT: subfe 3, 4, 3
10+
; CHECK-NEXT: subfe 3, 3, 5
1311
; CHECK-NEXT: blr
1412
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
1513
ret i8 %1
@@ -18,12 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
1816
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
1917
; CHECK-LABEL: ucmp_8_16:
2018
; CHECK: # %bb.0:
21-
; CHECK-NEXT: cmplw 3, 4
22-
; CHECK-NEXT: sub 5, 4, 3
23-
; CHECK-NEXT: li 3, -1
24-
; CHECK-NEXT: rldicl 5, 5, 1, 63
25-
; CHECK-NEXT: rldic 3, 3, 0, 32
26-
; CHECK-NEXT: isellt 3, 3, 5
19+
; CHECK-NEXT: subc 6, 4, 3
20+
; CHECK-NEXT: sub 5, 3, 4
21+
; CHECK-NEXT: subfe 3, 4, 3
22+
; CHECK-NEXT: subfe 3, 3, 5
2723
; CHECK-NEXT: blr
2824
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
2925
ret i8 %1
@@ -32,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
3228
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
3329
; CHECK-LABEL: ucmp_8_32:
3430
; CHECK: # %bb.0:
35-
; CHECK-NEXT: clrldi 5, 4, 32
36-
; CHECK-NEXT: clrldi 6, 3, 32
37-
; CHECK-NEXT: sub 5, 5, 6
38-
; CHECK-NEXT: cmplw 3, 4
39-
; CHECK-NEXT: li 3, -1
40-
; CHECK-NEXT: rldic 3, 3, 0, 32
41-
; CHECK-NEXT: rldicl 5, 5, 1, 63
42-
; CHECK-NEXT: isellt 3, 3, 5
31+
; CHECK-NEXT: subc 6, 4, 3
32+
; CHECK-NEXT: sub 5, 3, 4
33+
; CHECK-NEXT: subfe 3, 4, 3
34+
; CHECK-NEXT: subfe 3, 3, 5
4335
; CHECK-NEXT: blr
4436
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
4537
ret i8 %1
@@ -48,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
4840
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
4941
; CHECK-LABEL: ucmp_8_64:
5042
; CHECK: # %bb.0:
51-
; CHECK-NEXT: cmpld 3, 4
52-
; CHECK-NEXT: subc 3, 4, 3
53-
; CHECK-NEXT: subfe 3, 4, 4
54-
; CHECK-NEXT: li 4, -1
55-
; CHECK-NEXT: neg 3, 3
56-
; CHECK-NEXT: isellt 3, 4, 3
43+
; CHECK-NEXT: subc 6, 4, 3
44+
; CHECK-NEXT: sub 5, 3, 4
45+
; CHECK-NEXT: subfe 3, 4, 3
46+
; CHECK-NEXT: subfe 3, 3, 5
5747
; CHECK-NEXT: blr
5848
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
5949
ret i8 %1
@@ -82,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
8272
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
8373
; CHECK-LABEL: ucmp_32_32:
8474
; CHECK: # %bb.0:
85-
; CHECK-NEXT: clrldi 5, 4, 32
86-
; CHECK-NEXT: clrldi 6, 3, 32
87-
; CHECK-NEXT: sub 5, 5, 6
88-
; CHECK-NEXT: cmplw 3, 4
89-
; CHECK-NEXT: li 3, -1
90-
; CHECK-NEXT: rldic 3, 3, 0, 32
91-
; CHECK-NEXT: rldicl 5, 5, 1, 63
92-
; CHECK-NEXT: isellt 3, 3, 5
75+
; CHECK-NEXT: subc 6, 4, 3
76+
; CHECK-NEXT: sub 5, 3, 4
77+
; CHECK-NEXT: subfe 3, 4, 3
78+
; CHECK-NEXT: subfe 3, 3, 5
9379
; CHECK-NEXT: blr
9480
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
9581
ret i32 %1
@@ -98,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
9884
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
9985
; CHECK-LABEL: ucmp_32_64:
10086
; CHECK: # %bb.0:
101-
; CHECK-NEXT: cmpld 3, 4
102-
; CHECK-NEXT: subc 3, 4, 3
103-
; CHECK-NEXT: subfe 3, 4, 4
104-
; CHECK-NEXT: li 4, -1
105-
; CHECK-NEXT: neg 3, 3
106-
; CHECK-NEXT: isellt 3, 4, 3
87+
; CHECK-NEXT: subc 6, 4, 3
88+
; CHECK-NEXT: sub 5, 3, 4
89+
; CHECK-NEXT: subfe 3, 4, 3
90+
; CHECK-NEXT: subfe 3, 3, 5
10791
; CHECK-NEXT: blr
10892
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
10993
ret i32 %1
@@ -112,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
11296
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
11397
; CHECK-LABEL: ucmp_64_64:
11498
; CHECK: # %bb.0:
115-
; CHECK-NEXT: subc 5, 4, 3
116-
; CHECK-NEXT: cmpld 3, 4
117-
; CHECK-NEXT: li 3, -1
118-
; CHECK-NEXT: subfe 5, 4, 4
119-
; CHECK-NEXT: neg 5, 5
120-
; CHECK-NEXT: isellt 3, 3, 5
99+
; CHECK-NEXT: subc 6, 4, 3
100+
; CHECK-NEXT: sub 5, 3, 4
101+
; CHECK-NEXT: subfe 3, 4, 3
102+
; CHECK-NEXT: subfe 3, 3, 5
121103
; CHECK-NEXT: blr
122104
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
123105
ret i64 %1

0 commit comments

Comments
 (0)