Skip to content

Commit cabcd82

Browse files
committed
Fix mitigiations in PowerPC by having custom lowering anyway
1 parent dd52fba commit cabcd82

File tree

4 files changed

+71
-61
lines changed

4 files changed

+71
-61
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
585585
// We cannot sextinreg(i1). Expand to shifts.
586586
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
587587

588+
// Custom handling for PowerPC ucmp instruction
589+
setOperationAction(ISD::UCMP, MVT::i32, Custom);
590+
setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
591+
588592
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
589593
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
590594
// support continuation, user-level threading, and etc.. As a result, no
@@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
1261812622
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
1261912623
}
1262012624

12625+
// Lower unsigned 3-way compare producing -1/0/1.
12626+
SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12627+
SDLoc DL(Op);
12628+
SDValue A = DAG.getFreeze(Op.getOperand(0));
12629+
SDValue B = DAG.getFreeze(Op.getOperand(1));
12630+
EVT OpVT = A.getValueType(); // operand type
12631+
EVT ResVT = Op.getValueType(); // result type
12632+
12633+
// First compute diff = A - B (will become subf).
12634+
SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12635+
12636+
// Generate B - A using SUBC to capture carry.
12637+
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12638+
SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12639+
SDValue CA0 = SubC.getValue(1);
12640+
12641+
// t2 = A - B + CA0 using SUBE.
12642+
SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12643+
SDValue CA1 = SubE1.getValue(1);
12644+
12645+
// res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12646+
SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12647+
12648+
// Extract the first result and truncate to result type if needed
12649+
return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12650+
}
12651+
1262112652
/// LowerOperation - Provide custom lowering hooks for some operations.
1262212653
///
1262312654
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1272212753
case ISD::UADDO_CARRY:
1272312754
case ISD::USUBO_CARRY:
1272412755
return LowerADDSUBO_CARRY(Op, DAG);
12756+
case ISD::UCMP:
12757+
return LowerUCMP(Op, DAG);
1272512758
}
1272612759
}
1272712760

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,7 @@ namespace llvm {
13431343
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
13441344
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
13451345
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
1346+
SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
13461347

13471348
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
13481349
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/PowerPC/memcmp.ll

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: ldbrx 3, 0, 3
88
; CHECK-NEXT: ldbrx 4, 0, 4
9-
; CHECK-NEXT: cmpld 3, 4
10-
; CHECK-NEXT: subc 3, 4, 3
11-
; CHECK-NEXT: subfe 3, 4, 4
12-
; CHECK-NEXT: li 4, -1
13-
; CHECK-NEXT: neg 3, 3
14-
; CHECK-NEXT: isellt 3, 4, 3
9+
; CHECK-NEXT: subc 6, 4, 3
10+
; CHECK-NEXT: sub 5, 3, 4
11+
; CHECK-NEXT: subfe 3, 4, 3
12+
; CHECK-NEXT: subfe 3, 3, 5
1513
; CHECK-NEXT: extsw 3, 3
1614
; CHECK-NEXT: blr
1715
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8)
@@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado
2321
; CHECK: # %bb.0:
2422
; CHECK-NEXT: lwbrx 3, 0, 3
2523
; CHECK-NEXT: lwbrx 4, 0, 4
26-
; CHECK-NEXT: cmplw 3, 4
27-
; CHECK-NEXT: sub 5, 4, 3
28-
; CHECK-NEXT: li 3, -1
29-
; CHECK-NEXT: rldicl 5, 5, 1, 63
30-
; CHECK-NEXT: isellt 3, 3, 5
24+
; CHECK-NEXT: subc 6, 4, 3
25+
; CHECK-NEXT: sub 5, 3, 4
26+
; CHECK-NEXT: subfe 3, 4, 3
27+
; CHECK-NEXT: subfe 3, 3, 5
28+
; CHECK-NEXT: extsw 3, 3
3129
; CHECK-NEXT: blr
3230
%call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4)
3331
ret i32 %call

llvm/test/CodeGen/PowerPC/ucmp.ll

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,10 @@
44
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
55
; CHECK-LABEL: ucmp_8_8:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: clrldi 5, 4, 32
8-
; CHECK-NEXT: clrldi 6, 3, 32
9-
; CHECK-NEXT: sub 5, 5, 6
10-
; CHECK-NEXT: cmplw 3, 4
11-
; CHECK-NEXT: li 3, -1
12-
; CHECK-NEXT: rldic 3, 3, 0, 32
13-
; CHECK-NEXT: rldicl 5, 5, 1, 63
14-
; CHECK-NEXT: isellt 3, 3, 5
7+
; CHECK-NEXT: subc 6, 4, 3
8+
; CHECK-NEXT: sub 5, 3, 4
9+
; CHECK-NEXT: subfe 3, 4, 3
10+
; CHECK-NEXT: subfe 3, 3, 5
1511
; CHECK-NEXT: blr
1612
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
1713
ret i8 %1
@@ -20,14 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
2016
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
2117
; CHECK-LABEL: ucmp_8_16:
2218
; CHECK: # %bb.0:
23-
; CHECK-NEXT: clrldi 5, 4, 32
24-
; CHECK-NEXT: clrldi 6, 3, 32
25-
; CHECK-NEXT: sub 5, 5, 6
26-
; CHECK-NEXT: cmplw 3, 4
27-
; CHECK-NEXT: li 3, -1
28-
; CHECK-NEXT: rldic 3, 3, 0, 32
29-
; CHECK-NEXT: rldicl 5, 5, 1, 63
30-
; CHECK-NEXT: isellt 3, 3, 5
19+
; CHECK-NEXT: subc 6, 4, 3
20+
; CHECK-NEXT: sub 5, 3, 4
21+
; CHECK-NEXT: subfe 3, 4, 3
22+
; CHECK-NEXT: subfe 3, 3, 5
3123
; CHECK-NEXT: blr
3224
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
3325
ret i8 %1
@@ -36,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
3628
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
3729
; CHECK-LABEL: ucmp_8_32:
3830
; CHECK: # %bb.0:
39-
; CHECK-NEXT: clrldi 5, 4, 32
40-
; CHECK-NEXT: clrldi 6, 3, 32
41-
; CHECK-NEXT: sub 5, 5, 6
42-
; CHECK-NEXT: cmplw 3, 4
43-
; CHECK-NEXT: li 3, -1
44-
; CHECK-NEXT: rldic 3, 3, 0, 32
45-
; CHECK-NEXT: rldicl 5, 5, 1, 63
46-
; CHECK-NEXT: isellt 3, 3, 5
31+
; CHECK-NEXT: subc 6, 4, 3
32+
; CHECK-NEXT: sub 5, 3, 4
33+
; CHECK-NEXT: subfe 3, 4, 3
34+
; CHECK-NEXT: subfe 3, 3, 5
4735
; CHECK-NEXT: blr
4836
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
4937
ret i8 %1
@@ -52,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
5240
define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind {
5341
; CHECK-LABEL: ucmp_8_64:
5442
; CHECK: # %bb.0:
55-
; CHECK-NEXT: cmpld 3, 4
56-
; CHECK-NEXT: subc 3, 4, 3
57-
; CHECK-NEXT: subfe 3, 4, 4
58-
; CHECK-NEXT: li 4, -1
59-
; CHECK-NEXT: neg 3, 3
60-
; CHECK-NEXT: isellt 3, 4, 3
43+
; CHECK-NEXT: subc 6, 4, 3
44+
; CHECK-NEXT: sub 5, 3, 4
45+
; CHECK-NEXT: subfe 3, 4, 3
46+
; CHECK-NEXT: subfe 3, 3, 5
6147
; CHECK-NEXT: blr
6248
%1 = call i8 @llvm.ucmp(i64 %x, i64 %y)
6349
ret i8 %1
@@ -86,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
8672
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
8773
; CHECK-LABEL: ucmp_32_32:
8874
; CHECK: # %bb.0:
89-
; CHECK-NEXT: clrldi 5, 4, 32
90-
; CHECK-NEXT: clrldi 6, 3, 32
91-
; CHECK-NEXT: sub 5, 5, 6
92-
; CHECK-NEXT: cmplw 3, 4
93-
; CHECK-NEXT: li 3, -1
94-
; CHECK-NEXT: rldic 3, 3, 0, 32
95-
; CHECK-NEXT: rldicl 5, 5, 1, 63
96-
; CHECK-NEXT: isellt 3, 3, 5
75+
; CHECK-NEXT: subc 6, 4, 3
76+
; CHECK-NEXT: sub 5, 3, 4
77+
; CHECK-NEXT: subfe 3, 4, 3
78+
; CHECK-NEXT: subfe 3, 3, 5
9779
; CHECK-NEXT: blr
9880
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
9981
ret i32 %1
@@ -102,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
10284
define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
10385
; CHECK-LABEL: ucmp_32_64:
10486
; CHECK: # %bb.0:
105-
; CHECK-NEXT: cmpld 3, 4
106-
; CHECK-NEXT: subc 3, 4, 3
107-
; CHECK-NEXT: subfe 3, 4, 4
108-
; CHECK-NEXT: li 4, -1
109-
; CHECK-NEXT: neg 3, 3
110-
; CHECK-NEXT: isellt 3, 4, 3
87+
; CHECK-NEXT: subc 6, 4, 3
88+
; CHECK-NEXT: sub 5, 3, 4
89+
; CHECK-NEXT: subfe 3, 4, 3
90+
; CHECK-NEXT: subfe 3, 3, 5
11191
; CHECK-NEXT: blr
11292
%1 = call i32 @llvm.ucmp(i64 %x, i64 %y)
11393
ret i32 %1
@@ -116,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind {
11696
define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind {
11797
; CHECK-LABEL: ucmp_64_64:
11898
; CHECK: # %bb.0:
119-
; CHECK-NEXT: subc 5, 4, 3
120-
; CHECK-NEXT: cmpld 3, 4
121-
; CHECK-NEXT: li 3, -1
122-
; CHECK-NEXT: subfe 5, 4, 4
123-
; CHECK-NEXT: neg 5, 5
124-
; CHECK-NEXT: isellt 3, 3, 5
99+
; CHECK-NEXT: subc 6, 4, 3
100+
; CHECK-NEXT: sub 5, 3, 4
101+
; CHECK-NEXT: subfe 3, 4, 3
102+
; CHECK-NEXT: subfe 3, 3, 5
125103
; CHECK-NEXT: blr
126104
%1 = call i64 @llvm.ucmp(i64 %x, i64 %y)
127105
ret i64 %1

0 commit comments

Comments
 (0)