Skip to content

Commit f970b00

Browse files
committed
[ARM] Fix vector ule zero lowering
The instruction icmp ule <4 x i32> %0, zeroinitializer will usually be simplified to icmp eq <4 x i32> %0, zeroinitializer. It is not guaranteed though, and the code for lowering vector compares could pick the wrong form of the instruction if this happened. I've tried to make the code more explicit about the supported conditions. This fixes NEON being unable to select VCMPZ with HS conditions, and fixes some incorrect MVE patterns. Fixes #58514. Differential Revision: https://reviews.llvm.org/D136447
1 parent 9e60495 commit f970b00

File tree

6 files changed

+47
-28
lines changed

6 files changed

+47
-28
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6855,25 +6855,25 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
68556855

68566856
// If one of the operands is a constant vector zero, attempt to fold the
68576857
// comparison to a specialized compare-against-zero form.
6858-
SDValue SingleOp;
6859-
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
6860-
SingleOp = Op0;
6861-
else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6858+
if (ISD::isBuildVectorAllZeros(Op0.getNode()) &&
6859+
(Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6860+
Opc == ARMCC::NE)) {
68626861
if (Opc == ARMCC::GE)
68636862
Opc = ARMCC::LE;
68646863
else if (Opc == ARMCC::GT)
68656864
Opc = ARMCC::LT;
6866-
SingleOp = Op1;
6865+
std::swap(Op0, Op1);
68676866
}
68686867

68696868
SDValue Result;
6870-
if (SingleOp.getNode()) {
6871-
Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
6869+
if (ISD::isBuildVectorAllZeros(Op1.getNode()) &&
6870+
(Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6871+
Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6872+
Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
68726873
DAG.getConstant(Opc, dl, MVT::i32));
6873-
} else {
6874+
else
68746875
Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
68756876
DAG.getConstant(Opc, dl, MVT::i32));
6876-
}
68776877

68786878
Result = DAG.getSExtOrTrunc(Result, dl, VT);
68796879

llvm/test/CodeGen/ARM/vcmpz.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,16 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ult(<4 x i32> %0) {
174174
ret <4 x i32> %3
175175
}
176176

177-
;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) {
178-
; %2 = icmp ule <4 x i32> %0, zeroinitializer
179-
; %3 = sext <4 x i1> %2 to <4 x i32>
180-
; ret <4 x i32> %3
181-
;}
177+
define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) {
178+
; CHECK-LABEL: vcmpz_zr_ule:
179+
; CHECK: @ %bb.0:
180+
; CHECK-NEXT: vmov.i32 q8, #0x0
181+
; CHECK-NEXT: vcge.u32 q0, q8, q0
182+
; CHECK-NEXT: bx lr
183+
%2 = icmp ule <4 x i32> %0, zeroinitializer
184+
%3 = sext <4 x i1> %2 to <4 x i32>
185+
ret <4 x i32> %3
186+
}
182187

183188
define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ugt(<4 x i32> %0) {
184189
; CHECK-LABEL: vcmpz_zr_ugt:
@@ -294,8 +299,13 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_ugt(<4 x i32> %0) {
294299
ret <4 x i32> %3
295300
}
296301

297-
;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) {
298-
; %2 = icmp uge <4 x i32> zeroinitializer, %0
299-
; %3 = sext <4 x i1> %2 to <4 x i32>
300-
; ret <4 x i32> %3
301-
;}
302+
define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) {
303+
; CHECK-LABEL: vcmpz_zl_uge:
304+
; CHECK: @ %bb.0:
305+
; CHECK-NEXT: vmov.i32 q8, #0x0
306+
; CHECK-NEXT: vcge.u32 q0, q8, q0
307+
; CHECK-NEXT: bx lr
308+
%2 = icmp uge <4 x i32> zeroinitializer, %0
309+
%3 = sext <4 x i1> %2 to <4 x i32>
310+
ret <4 x i32> %3
311+
}

llvm/test/CodeGen/Thumb2/mve-pred-and.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,9 @@ entry:
122122
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
123123
; CHECK-LABEL: cmpulez_v4i1:
124124
; CHECK: @ %bb.0: @ %entry
125+
; CHECK-NEXT: vmov.i32 q2, #0x0
125126
; CHECK-NEXT: vpt.i32 eq, q0, zr
126-
; CHECK-NEXT: vcmpt.u32 cs, q1, zr
127+
; CHECK-NEXT: vcmpt.u32 cs, q2, q1
127128
; CHECK-NEXT: vpsel q0, q0, q1
128129
; CHECK-NEXT: bx lr
129130
entry:

llvm/test/CodeGen/Thumb2/mve-pred-or.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ entry:
123123
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
124124
; CHECK-LABEL: cmpulez_v4i1:
125125
; CHECK: @ %bb.0: @ %entry
126-
; CHECK-NEXT: vcmp.u32 cs, q1, zr
126+
; CHECK-NEXT: vmov.i32 q2, #0x0
127+
; CHECK-NEXT: vcmp.u32 cs, q2, q1
127128
; CHECK-NEXT: vpnot
128129
; CHECK-NEXT: vpst
129130
; CHECK-NEXT: vcmpt.i32 ne, q0, zr

llvm/test/CodeGen/Thumb2/mve-pred-xor.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ entry:
151151
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
152152
; CHECK-LABEL: cmpulez_v4i1:
153153
; CHECK: @ %bb.0: @ %entry
154-
; CHECK-NEXT: vcmp.u32 cs, q1, zr
154+
; CHECK-NEXT: vmov.i32 q2, #0x0
155+
; CHECK-NEXT: vcmp.u32 cs, q2, q1
155156
; CHECK-NEXT: vmrs r0, p0
156157
; CHECK-NEXT: vcmp.i32 eq, q0, zr
157158
; CHECK-NEXT: vmrs r1, p0

llvm/test/CodeGen/Thumb2/mve-vcmpz.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ entry:
110110
define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
111111
; CHECK-LABEL: vcmp_ulez_v4i32:
112112
; CHECK: @ %bb.0: @ %entry
113-
; CHECK-NEXT: vcmp.u32 cs, q0, zr
113+
; CHECK-NEXT: vmov.i32 q3, #0x0
114+
; CHECK-NEXT: vcmp.u32 cs, q3, q0
114115
; CHECK-NEXT: vpsel q0, q1, q2
115116
; CHECK-NEXT: bx lr
116117
entry:
@@ -229,7 +230,8 @@ entry:
229230
define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
230231
; CHECK-LABEL: vcmp_ulez_v8i16:
231232
; CHECK: @ %bb.0: @ %entry
232-
; CHECK-NEXT: vcmp.u16 cs, q0, zr
233+
; CHECK-NEXT: vmov.i32 q3, #0x0
234+
; CHECK-NEXT: vcmp.u16 cs, q3, q0
233235
; CHECK-NEXT: vpsel q0, q1, q2
234236
; CHECK-NEXT: bx lr
235237
entry:
@@ -348,7 +350,8 @@ entry:
348350
define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
349351
; CHECK-LABEL: vcmp_ulez_v16i8:
350352
; CHECK: @ %bb.0: @ %entry
351-
; CHECK-NEXT: vcmp.u8 cs, q0, zr
353+
; CHECK-NEXT: vmov.i32 q3, #0x0
354+
; CHECK-NEXT: vcmp.u8 cs, q3, q0
352355
; CHECK-NEXT: vpsel q0, q1, q2
353356
; CHECK-NEXT: bx lr
354357
entry:
@@ -489,7 +492,8 @@ entry:
489492
define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
490493
; CHECK-LABEL: vcmp_r_ugez_v4i32:
491494
; CHECK: @ %bb.0: @ %entry
492-
; CHECK-NEXT: vcmp.u32 cs, q0, zr
495+
; CHECK-NEXT: vmov.i32 q3, #0x0
496+
; CHECK-NEXT: vcmp.u32 cs, q3, q0
493497
; CHECK-NEXT: vpsel q0, q1, q2
494498
; CHECK-NEXT: bx lr
495499
entry:
@@ -608,7 +612,8 @@ entry:
608612
define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
609613
; CHECK-LABEL: vcmp_r_ugez_v8i16:
610614
; CHECK: @ %bb.0: @ %entry
611-
; CHECK-NEXT: vcmp.u16 cs, q0, zr
615+
; CHECK-NEXT: vmov.i32 q3, #0x0
616+
; CHECK-NEXT: vcmp.u16 cs, q3, q0
612617
; CHECK-NEXT: vpsel q0, q1, q2
613618
; CHECK-NEXT: bx lr
614619
entry:
@@ -727,7 +732,8 @@ entry:
727732
define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
728733
; CHECK-LABEL: vcmp_r_ugez_v16i8:
729734
; CHECK: @ %bb.0: @ %entry
730-
; CHECK-NEXT: vcmp.u8 cs, q0, zr
735+
; CHECK-NEXT: vmov.i32 q3, #0x0
736+
; CHECK-NEXT: vcmp.u8 cs, q3, q0
731737
; CHECK-NEXT: vpsel q0, q1, q2
732738
; CHECK-NEXT: bx lr
733739
entry:

0 commit comments

Comments
 (0)