Skip to content

Commit 1b0400e

Browse files
committed
[X86] combineSubABS - handle NEG(ABD()) expanded patterns
combineSubABS already handles the "(sub Y, cmovns X, -X) -> (add Y, cmovns -X, X)" fold by flipping the cmov operands. We can do something similar for the negation of ABDS/U patterns which have been expanded to a CMOVL/CMOVB with a pair of commuted subtractions: "NEG(ABD(X,Y)) -> NEG(CMOV(SUB(X,Y),SUB(Y,X))) -> CMOV(SUB(Y,X),SUB(X,Y))"
1 parent ed22029 commit 1b0400e

File tree

3 files changed

+80
-108
lines changed

3 files changed

+80
-108
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56096,34 +56096,50 @@ static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
5609656096
if (N1.getOpcode() != X86ISD::CMOV || !N1.hasOneUse())
5609756097
return SDValue();
5609856098

56099-
X86::CondCode CC = (X86::CondCode)N1.getConstantOperandVal(2);
56100-
if (CC != X86::COND_S && CC != X86::COND_NS)
56101-
return SDValue();
56102-
56103-
// Condition should come from a negate operation.
5610456099
SDValue Cond = N1.getOperand(3);
56105-
if (Cond.getOpcode() != X86ISD::SUB || !isNullConstant(Cond.getOperand(0)))
56100+
if (Cond.getOpcode() != X86ISD::SUB)
5610656101
return SDValue();
5610756102
assert(Cond.getResNo() == 1 && "Unexpected result number");
5610856103

56109-
// Get the X and -X from the negate.
56110-
SDValue NegX = Cond.getValue(0);
56111-
SDValue X = Cond.getOperand(1);
56112-
5611356104
SDValue FalseOp = N1.getOperand(0);
5611456105
SDValue TrueOp = N1.getOperand(1);
56106+
X86::CondCode CC = (X86::CondCode)N1.getConstantOperandVal(2);
56107+
MVT VT = N->getSimpleValueType(0);
56108+
SDLoc DL(N);
5611556109

56116-
// Cmov operands should be X and NegX. Order doesn't matter.
56117-
if (!(TrueOp == X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp == X))
56118-
return SDValue();
56110+
// ABS condition should come from a negate operation.
56111+
if ((CC == X86::COND_S || CC == X86::COND_NS) &&
56112+
isNullConstant(Cond.getOperand(0))) {
56113+
// Get the X and -X from the negate.
56114+
SDValue NegX = Cond.getValue(0);
56115+
SDValue X = Cond.getOperand(1);
5611956116

56120-
// Build a new CMOV with the operands swapped.
56121-
SDLoc DL(N);
56122-
MVT VT = N->getSimpleValueType(0);
56123-
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
56124-
N1.getOperand(2), Cond);
56125-
// Convert sub to add.
56126-
return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
56117+
// Cmov operands should be X and NegX. Order doesn't matter.
56118+
if (!(TrueOp == X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp == X))
56119+
return SDValue();
56120+
56121+
// Build a new CMOV with the operands swapped.
56122+
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
56123+
N1.getOperand(2), Cond);
56124+
// Convert sub to add.
56125+
return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
56126+
}
56127+
56128+
// Handle ABD special case:
56129+
// NEG(ABD(X,Y)) -> NEG(CMOV(SUB(X,Y),SUB(Y,X))) -> CMOV(SUB(Y,X),SUB(X,Y)).
56130+
// ABD condition should come from a pair of matching subtracts.
56131+
if ((CC == X86::COND_L || CC == X86::COND_B) && isNullConstant(N0) &&
56132+
(FalseOp == Cond.getValue(0) || TrueOp == Cond.getValue(0)) &&
56133+
(TrueOp.getOpcode() == ISD::SUB || TrueOp.getOpcode() == X86ISD::SUB) &&
56134+
(FalseOp.getOpcode() == ISD::SUB || FalseOp.getOpcode() == X86ISD::SUB) &&
56135+
(TrueOp.getOperand(0) == FalseOp.getOperand(1)) &&
56136+
(TrueOp.getOperand(1) == FalseOp.getOperand(0))) {
56137+
// Build a new CMOV with the operands swapped.
56138+
return DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp, N1.getOperand(2),
56139+
Cond);
56140+
}
56141+
56142+
return SDValue();
5612756143
}
5612856144

5612956145
static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {

llvm/test/CodeGen/X86/abds-neg.ll

Lines changed: 25 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
112112
; X86-NEXT: movl %ecx, %edx
113113
; X86-NEXT: subl %eax, %edx
114114
; X86-NEXT: subl %ecx, %eax
115-
; X86-NEXT: cmovll %edx, %eax
116-
; X86-NEXT: negl %eax
115+
; X86-NEXT: cmovgel %edx, %eax
117116
; X86-NEXT: # kill: def $ax killed $ax killed $eax
118117
; X86-NEXT: retl
119118
;
@@ -144,8 +143,7 @@ define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
144143
; X86-NEXT: movl %ecx, %edx
145144
; X86-NEXT: subl %eax, %edx
146145
; X86-NEXT: subl %ecx, %eax
147-
; X86-NEXT: cmovll %edx, %eax
148-
; X86-NEXT: negl %eax
146+
; X86-NEXT: cmovgel %edx, %eax
149147
; X86-NEXT: # kill: def $ax killed $ax killed $eax
150148
; X86-NEXT: retl
151149
;
@@ -176,8 +174,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
176174
; X86-NEXT: movl %ecx, %edx
177175
; X86-NEXT: subl %eax, %edx
178176
; X86-NEXT: subl %ecx, %eax
179-
; X86-NEXT: cmovll %edx, %eax
180-
; X86-NEXT: negl %eax
177+
; X86-NEXT: cmovgel %edx, %eax
181178
; X86-NEXT: # kill: def $ax killed $ax killed $eax
182179
; X86-NEXT: retl
183180
;
@@ -208,17 +205,15 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
208205
; X86-NEXT: movl %ecx, %edx
209206
; X86-NEXT: subl %eax, %edx
210207
; X86-NEXT: subl %ecx, %eax
211-
; X86-NEXT: cmovll %edx, %eax
212-
; X86-NEXT: negl %eax
208+
; X86-NEXT: cmovgel %edx, %eax
213209
; X86-NEXT: retl
214210
;
215211
; X64-LABEL: abd_ext_i32:
216212
; X64: # %bb.0:
217213
; X64-NEXT: movl %edi, %eax
218214
; X64-NEXT: subl %esi, %eax
219215
; X64-NEXT: subl %edi, %esi
220-
; X64-NEXT: cmovgel %esi, %eax
221-
; X64-NEXT: negl %eax
216+
; X64-NEXT: cmovll %esi, %eax
222217
; X64-NEXT: retq
223218
%aext = sext i32 %a to i64
224219
%bext = sext i32 %b to i64
@@ -237,8 +232,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
237232
; X86-NEXT: movl %ecx, %edx
238233
; X86-NEXT: subl %eax, %edx
239234
; X86-NEXT: subl %ecx, %eax
240-
; X86-NEXT: cmovll %edx, %eax
241-
; X86-NEXT: negl %eax
235+
; X86-NEXT: cmovgel %edx, %eax
242236
; X86-NEXT: retl
243237
;
244238
; X64-LABEL: abd_ext_i32_i16:
@@ -247,8 +241,7 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
247241
; X64-NEXT: movl %edi, %ecx
248242
; X64-NEXT: subl %eax, %ecx
249243
; X64-NEXT: subl %edi, %eax
250-
; X64-NEXT: cmovll %ecx, %eax
251-
; X64-NEXT: negl %eax
244+
; X64-NEXT: cmovgel %ecx, %eax
252245
; X64-NEXT: retq
253246
%aext = sext i32 %a to i64
254247
%bext = sext i16 %b to i64
@@ -267,17 +260,15 @@ define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
267260
; X86-NEXT: movl %ecx, %edx
268261
; X86-NEXT: subl %eax, %edx
269262
; X86-NEXT: subl %ecx, %eax
270-
; X86-NEXT: cmovll %edx, %eax
271-
; X86-NEXT: negl %eax
263+
; X86-NEXT: cmovgel %edx, %eax
272264
; X86-NEXT: retl
273265
;
274266
; X64-LABEL: abd_ext_i32_undef:
275267
; X64: # %bb.0:
276268
; X64-NEXT: movl %edi, %eax
277269
; X64-NEXT: subl %esi, %eax
278270
; X64-NEXT: subl %edi, %esi
279-
; X64-NEXT: cmovgel %esi, %eax
280-
; X64-NEXT: negl %eax
271+
; X64-NEXT: cmovll %esi, %eax
281272
; X64-NEXT: retq
282273
%aext = sext i32 %a to i64
283274
%bext = sext i32 %b to i64
@@ -319,8 +310,7 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
319310
; X64-NEXT: movq %rdi, %rax
320311
; X64-NEXT: subq %rsi, %rax
321312
; X64-NEXT: subq %rdi, %rsi
322-
; X64-NEXT: cmovgeq %rsi, %rax
323-
; X64-NEXT: negq %rax
313+
; X64-NEXT: cmovlq %rsi, %rax
324314
; X64-NEXT: retq
325315
%aext = sext i64 %a to i128
326316
%bext = sext i64 %b to i128
@@ -362,8 +352,7 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
362352
; X64-NEXT: movq %rdi, %rax
363353
; X64-NEXT: subq %rsi, %rax
364354
; X64-NEXT: subq %rdi, %rsi
365-
; X64-NEXT: cmovgeq %rsi, %rax
366-
; X64-NEXT: negq %rax
355+
; X64-NEXT: cmovlq %rsi, %rax
367356
; X64-NEXT: retq
368357
%aext = sext i64 %a to i128
369358
%bext = sext i64 %b to i128
@@ -558,8 +547,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
558547
; X86-NEXT: movl %ecx, %edx
559548
; X86-NEXT: subl %eax, %edx
560549
; X86-NEXT: subl %ecx, %eax
561-
; X86-NEXT: cmovll %edx, %eax
562-
; X86-NEXT: negl %eax
550+
; X86-NEXT: cmovgel %edx, %eax
563551
; X86-NEXT: # kill: def $ax killed $ax killed $eax
564552
; X86-NEXT: retl
565553
;
@@ -587,17 +575,15 @@ define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
587575
; X86-NEXT: movl %ecx, %edx
588576
; X86-NEXT: subl %eax, %edx
589577
; X86-NEXT: subl %ecx, %eax
590-
; X86-NEXT: cmovll %edx, %eax
591-
; X86-NEXT: negl %eax
578+
; X86-NEXT: cmovgel %edx, %eax
592579
; X86-NEXT: retl
593580
;
594581
; X64-LABEL: abd_minmax_i32:
595582
; X64: # %bb.0:
596583
; X64-NEXT: movl %edi, %eax
597584
; X64-NEXT: subl %esi, %eax
598585
; X64-NEXT: subl %edi, %esi
599-
; X64-NEXT: cmovgel %esi, %eax
600-
; X64-NEXT: negl %eax
586+
; X64-NEXT: cmovll %esi, %eax
601587
; X64-NEXT: retq
602588
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
603589
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -641,8 +627,7 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
641627
; X64-NEXT: movq %rdi, %rax
642628
; X64-NEXT: subq %rsi, %rax
643629
; X64-NEXT: subq %rdi, %rsi
644-
; X64-NEXT: cmovgeq %rsi, %rax
645-
; X64-NEXT: negq %rax
630+
; X64-NEXT: cmovlq %rsi, %rax
646631
; X64-NEXT: retq
647632
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
648633
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -776,8 +761,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
776761
; X86-NEXT: movl %ecx, %edx
777762
; X86-NEXT: subl %eax, %edx
778763
; X86-NEXT: subl %ecx, %eax
779-
; X86-NEXT: cmovll %edx, %eax
780-
; X86-NEXT: negl %eax
764+
; X86-NEXT: cmovgel %edx, %eax
781765
; X86-NEXT: # kill: def $ax killed $ax killed $eax
782766
; X86-NEXT: retl
783767
;
@@ -806,17 +790,15 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
806790
; X86-NEXT: movl %ecx, %edx
807791
; X86-NEXT: subl %eax, %edx
808792
; X86-NEXT: subl %ecx, %eax
809-
; X86-NEXT: cmovll %edx, %eax
810-
; X86-NEXT: negl %eax
793+
; X86-NEXT: cmovgel %edx, %eax
811794
; X86-NEXT: retl
812795
;
813796
; X64-LABEL: abd_cmp_i32:
814797
; X64: # %bb.0:
815798
; X64-NEXT: movl %edi, %eax
816799
; X64-NEXT: subl %esi, %eax
817800
; X64-NEXT: subl %edi, %esi
818-
; X64-NEXT: cmovgel %esi, %eax
819-
; X64-NEXT: negl %eax
801+
; X64-NEXT: cmovll %esi, %eax
820802
; X64-NEXT: retq
821803
%cmp = icmp sge i32 %a, %b
822804
%ab = sub i32 %a, %b
@@ -853,8 +835,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
853835
; X64-NEXT: movq %rdi, %rax
854836
; X64-NEXT: subq %rsi, %rax
855837
; X64-NEXT: subq %rdi, %rsi
856-
; X64-NEXT: cmovgeq %rsi, %rax
857-
; X64-NEXT: negq %rax
838+
; X64-NEXT: cmovlq %rsi, %rax
858839
; X64-NEXT: retq
859840
%cmp = icmp slt i64 %a, %b
860841
%ab = sub i64 %a, %b
@@ -1031,17 +1012,15 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
10311012
; X86-NEXT: movl %ecx, %edx
10321013
; X86-NEXT: subl %eax, %edx
10331014
; X86-NEXT: subl %ecx, %eax
1034-
; X86-NEXT: cmovll %edx, %eax
1035-
; X86-NEXT: negl %eax
1015+
; X86-NEXT: cmovgel %edx, %eax
10361016
; X86-NEXT: retl
10371017
;
10381018
; X64-LABEL: abd_subnsw_i32:
10391019
; X64: # %bb.0:
10401020
; X64-NEXT: movl %edi, %eax
10411021
; X64-NEXT: subl %esi, %eax
10421022
; X64-NEXT: subl %edi, %esi
1043-
; X64-NEXT: cmovgel %esi, %eax
1044-
; X64-NEXT: negl %eax
1023+
; X64-NEXT: cmovll %esi, %eax
10451024
; X64-NEXT: retq
10461025
%sub = sub nsw i32 %a, %b
10471026
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -1057,17 +1036,15 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
10571036
; X86-NEXT: movl %ecx, %edx
10581037
; X86-NEXT: subl %eax, %edx
10591038
; X86-NEXT: subl %ecx, %eax
1060-
; X86-NEXT: cmovll %edx, %eax
1061-
; X86-NEXT: negl %eax
1039+
; X86-NEXT: cmovgel %edx, %eax
10621040
; X86-NEXT: retl
10631041
;
10641042
; X64-LABEL: abd_subnsw_i32_undef:
10651043
; X64: # %bb.0:
10661044
; X64-NEXT: movl %edi, %eax
10671045
; X64-NEXT: subl %esi, %eax
10681046
; X64-NEXT: subl %edi, %esi
1069-
; X64-NEXT: cmovgel %esi, %eax
1070-
; X64-NEXT: negl %eax
1047+
; X64-NEXT: cmovll %esi, %eax
10711048
; X64-NEXT: retq
10721049
%sub = sub nsw i32 %a, %b
10731050
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -1098,8 +1075,7 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
10981075
; X64-NEXT: movq %rdi, %rax
10991076
; X64-NEXT: subq %rsi, %rax
11001077
; X64-NEXT: subq %rdi, %rsi
1101-
; X64-NEXT: cmovgeq %rsi, %rax
1102-
; X64-NEXT: negq %rax
1078+
; X64-NEXT: cmovlq %rsi, %rax
11031079
; X64-NEXT: retq
11041080
%sub = sub nsw i64 %a, %b
11051081
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -1130,8 +1106,7 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
11301106
; X64-NEXT: movq %rdi, %rax
11311107
; X64-NEXT: subq %rsi, %rax
11321108
; X64-NEXT: subq %rdi, %rsi
1133-
; X64-NEXT: cmovgeq %rsi, %rax
1134-
; X64-NEXT: negq %rax
1109+
; X64-NEXT: cmovlq %rsi, %rax
11351110
; X64-NEXT: retq
11361111
%sub = sub nsw i64 %a, %b
11371112
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)

0 commit comments

Comments
 (0)