Skip to content

Commit 9903923

Browse files
committed
[ARM] Copy SMAX(lhs, 0) and SMIN(lhs, 0) patterns from AArch64 to ARM
They work on ARM too.
1 parent 291c247 commit 9903923

File tree

4 files changed

+33
-35
lines changed

4 files changed

+33
-35
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5540,6 +5540,24 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
55405540
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
55415541
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
55425542
}
5543+
5544+
// Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
5545+
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
5546+
// (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
5547+
// Both require less instructions than compare and conditional select.
5548+
if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TrueVal && RHSC &&
5549+
RHSC->isZero() && CFVal && CFVal->isZero() &&
5550+
LHS.getValueType() == RHS.getValueType()) {
5551+
EVT VT = LHS.getValueType();
5552+
SDValue Shift =
5553+
DAG.getNode(ISD::SRA, dl, VT, LHS,
5554+
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
5555+
5556+
if (CC == ISD::SETGT)
5557+
Shift = DAG.getNOT(dl, Shift, VT);
5558+
5559+
return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
5560+
}
55435561
}
55445562

55455563
if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&

llvm/test/CodeGen/ARM/min-max-combine.ll

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -83,26 +83,20 @@ define i8 @smini8_zero(i8 %a) {
8383
; ARM-LABEL: smini8_zero:
8484
; ARM: @ %bb.0:
8585
; ARM-NEXT: sxtb r0, r0
86-
; ARM-NEXT: cmp r0, #0
87-
; ARM-NEXT: movpl r0, #0
86+
; ARM-NEXT: and r0, r0, r0, asr #31
8887
; ARM-NEXT: bx lr
8988
;
9089
; THUMB-LABEL: smini8_zero:
9190
; THUMB: @ %bb.0:
92-
; THUMB-NEXT: sxtb r0, r0
93-
; THUMB-NEXT: cmp r0, #0
94-
; THUMB-NEXT: bmi .LBB3_2
95-
; THUMB-NEXT: @ %bb.1:
96-
; THUMB-NEXT: movs r0, #0
97-
; THUMB-NEXT: .LBB3_2:
91+
; THUMB-NEXT: sxtb r1, r0
92+
; THUMB-NEXT: asrs r0, r1, #31
93+
; THUMB-NEXT: ands r0, r1
9894
; THUMB-NEXT: bx lr
9995
;
10096
; THUMB2-LABEL: smini8_zero:
10197
; THUMB2: @ %bb.0:
10298
; THUMB2-NEXT: sxtb r0, r0
103-
; THUMB2-NEXT: cmp r0, #0
104-
; THUMB2-NEXT: it pl
105-
; THUMB2-NEXT: movpl r0, #0
99+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
106100
; THUMB2-NEXT: bx lr
107101
%c = call i8 @llvm.smin.i8(i8 %a, i8 0)
108102
ret i8 %c
@@ -114,26 +108,20 @@ define i16 @smini16_zero(i16 %a) {
114108
; ARM-LABEL: smini16_zero:
115109
; ARM: @ %bb.0:
116110
; ARM-NEXT: sxth r0, r0
117-
; ARM-NEXT: cmp r0, #0
118-
; ARM-NEXT: movpl r0, #0
111+
; ARM-NEXT: and r0, r0, r0, asr #31
119112
; ARM-NEXT: bx lr
120113
;
121114
; THUMB-LABEL: smini16_zero:
122115
; THUMB: @ %bb.0:
123-
; THUMB-NEXT: sxth r0, r0
124-
; THUMB-NEXT: cmp r0, #0
125-
; THUMB-NEXT: bmi .LBB4_2
126-
; THUMB-NEXT: @ %bb.1:
127-
; THUMB-NEXT: movs r0, #0
128-
; THUMB-NEXT: .LBB4_2:
116+
; THUMB-NEXT: sxth r1, r0
117+
; THUMB-NEXT: asrs r0, r1, #31
118+
; THUMB-NEXT: ands r0, r1
129119
; THUMB-NEXT: bx lr
130120
;
131121
; THUMB2-LABEL: smini16_zero:
132122
; THUMB2: @ %bb.0:
133123
; THUMB2-NEXT: sxth r0, r0
134-
; THUMB2-NEXT: cmp r0, #0
135-
; THUMB2-NEXT: it pl
136-
; THUMB2-NEXT: movpl r0, #0
124+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
137125
; THUMB2-NEXT: bx lr
138126
%c = call i16 @llvm.smin.i16(i16 %a, i16 0)
139127
ret i16 %c
@@ -144,24 +132,18 @@ declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone
144132
define i32 @smini32_zero(i32 %a) {
145133
; ARM-LABEL: smini32_zero:
146134
; ARM: @ %bb.0:
147-
; ARM-NEXT: cmp r0, #0
148-
; ARM-NEXT: movpl r0, #0
135+
; ARM-NEXT: and r0, r0, r0, asr #31
149136
; ARM-NEXT: bx lr
150137
;
151138
; THUMB-LABEL: smini32_zero:
152139
; THUMB: @ %bb.0:
153-
; THUMB-NEXT: cmp r0, #0
154-
; THUMB-NEXT: bmi .LBB5_2
155-
; THUMB-NEXT: @ %bb.1:
156-
; THUMB-NEXT: movs r0, #0
157-
; THUMB-NEXT: .LBB5_2:
140+
; THUMB-NEXT: asrs r1, r0, #31
141+
; THUMB-NEXT: ands r0, r1
158142
; THUMB-NEXT: bx lr
159143
;
160144
; THUMB2-LABEL: smini32_zero:
161145
; THUMB2: @ %bb.0:
162-
; THUMB2-NEXT: cmp r0, #0
163-
; THUMB2-NEXT: it pl
164-
; THUMB2-NEXT: movpl r0, #0
146+
; THUMB2-NEXT: and.w r0, r0, r0, asr #31
165147
; THUMB2-NEXT: bx lr
166148
%c = call i32 @llvm.smin.i32(i32 %a, i32 0)
167149
ret i32 %c

llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %block
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: .save {r7, lr}
88
; CHECK-NEXT: push {r7, lr}
9-
; CHECK-NEXT: subs r2, r1, #4
10-
; CHECK-NEXT: movw r3, #0
9+
; CHECK-NEXT: movs r3, #0
1110
; CHECK-NEXT: movt r3, #65408
1211
; CHECK-NEXT: vdup.32 q0, r3
1312
; CHECK-NEXT: dlstp.32 lr, r1

llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize)
55
; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve:
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: push {r7, lr}
8-
; CHECK-NEXT: subs.w r3, r2, #8
98
; CHECK-NEXT: dlstp.16 lr, r2
109
; CHECK-NEXT: .LBB0_1: @ %do.body
1110
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1

0 commit comments

Comments
 (0)