Skip to content

Commit f4fb9b8

Browse files
committed
Add select setlt extend to tst
1 parent 2929a29 commit f4fb9b8

File tree

4 files changed

+108
-19
lines changed

4 files changed

+108
-19
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11630,6 +11630,48 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1163011630
return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
1163111631
}
1163211632

11633+
// Check for sign bit test patterns that can use TST optimization.
11634+
// (SELECT_CC setlt, singn_extend_inreg, 0, tval, fval)
11635+
// -> TST %operand, sign_bit; CSEL
11636+
// (SELECT_CC setlt, singn_extend, 0, tval, fval)
11637+
// -> TST %operand, sign_bit; CSEL
11638+
if (CC == ISD::SETLT && RHSC && RHSC->isZero() && LHS.hasOneUse() &&
11639+
(LHS.getOpcode() == ISD::SIGN_EXTEND_INREG ||
11640+
LHS.getOpcode() == ISD::SIGN_EXTEND)) {
11641+
11642+
SDValue OriginalVal = LHS.getOperand(0);
11643+
EVT OriginalVT = LHS.getOpcode() == ISD::SIGN_EXTEND_INREG
11644+
? cast<VTSDNode>(LHS.getOperand(1))->getVT()
11645+
: OriginalVal.getValueType();
11646+
11647+
// Apply TST optimization for integer types
11648+
if (OriginalVT.isInteger()) {
11649+
// Calculate the sign bit for the original type
11650+
unsigned BitWidth = OriginalVT.getSizeInBits();
11651+
APInt SignBit = APInt::getSignedMinValue(BitWidth);
11652+
EVT TestVT = (BitWidth <= 32) ? MVT::i32 : MVT::i64;
11653+
unsigned TestBitWidth = TestVT.getSizeInBits();
11654+
if (BitWidth < TestBitWidth) {
11655+
SignBit = SignBit.zext(TestBitWidth);
11656+
}
11657+
11658+
SDValue SignBitConst = DAG.getConstant(SignBit, DL, TestVT);
11659+
SDValue TestOperand = OriginalVal;
11660+
if (OriginalVal.getValueType() != TestVT) {
11661+
TestOperand = DAG.getNode(ISD::ZERO_EXTEND, DL, TestVT, OriginalVal);
11662+
}
11663+
11664+
SDValue TST =
11665+
DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(TestVT, MVT::i32),
11666+
TestOperand, SignBitConst);
11667+
11668+
SDValue Flags = TST.getValue(1);
11669+
return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal,
11670+
FVal, DAG.getConstant(AArch64CC::MI, DL, MVT::i32),
11671+
Flags);
11672+
}
11673+
}
11674+
1163311675
// Canonicalise absolute difference patterns:
1163411676
// select_cc lhs, rhs, sub(lhs, rhs), sub(rhs, lhs), cc ->
1163511677
// select_cc lhs, rhs, sub(lhs, rhs), neg(sub(lhs, rhs)), cc

llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ B:
7878
define i32 @g_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
7979
; CHECK-LABEL: g_i8_sign_extend_inreg:
8080
; CHECK: // %bb.0: // %entry
81-
; CHECK-NEXT: sxtb w8, w0
82-
; CHECK-NEXT: cmp w8, #0
81+
; CHECK-NEXT: tst w0, #0x80
8382
; CHECK-NEXT: csel w8, w1, w2, mi
8483
; CHECK-NEXT: add w0, w8, w0, uxtb
8584
; CHECK-NEXT: ret
@@ -100,8 +99,7 @@ B:
10099
define i32 @g_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
101100
; CHECK-LABEL: g_i16_sign_extend_inreg:
102101
; CHECK: // %bb.0: // %entry
103-
; CHECK-NEXT: sxth w8, w0
104-
; CHECK-NEXT: cmp w8, #0
102+
; CHECK-NEXT: tst w0, #0x8000
105103
; CHECK-NEXT: csel w8, w1, w2, mi
106104
; CHECK-NEXT: add w0, w8, w0, uxth
107105
; CHECK-NEXT: ret
@@ -167,9 +165,7 @@ B:
167165
define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
168166
; CHECK-LABEL: g_i32_sign_extend_i64:
169167
; CHECK: // %bb.0: // %entry
170-
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
171-
; CHECK-NEXT: sxtw x8, w0
172-
; CHECK-NEXT: cmp x8, #0
168+
; CHECK-NEXT: tst w0, #0x80000000
173169
; CHECK-NEXT: csel x8, x1, x2, mi
174170
; CHECK-NEXT: add x0, x8, w0, uxtw
175171
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/icmp.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2093,3 +2093,54 @@ define <2 x i1> @icmp_slt_v2i64_Zero_LHS(<2 x i64> %a) {
20932093
%c = icmp slt <2 x i64> <i64 0, i64 0>, %a
20942094
ret <2 x i1> %c
20952095
}
2096+
2097+
; Test TST optimization for i8 sign bit testing with cross-type select
2098+
; This tests the pattern: icmp slt i8 %val, 0; select i1 %cmp, i32 %a, i32 %b
2099+
; The optimization should convert sxtb+cmp to tst for sign bit testing.
2100+
2101+
define i32 @i8_signbit_tst_constants(i8 %x, i8 %y) {
2102+
; CHECK-SD-LABEL: i8_signbit_tst_constants:
2103+
; CHECK-SD: // %bb.0:
2104+
; CHECK-SD-NEXT: add w9, w0, w1
2105+
; CHECK-SD-NEXT: mov w8, #42 // =0x2a
2106+
; CHECK-SD-NEXT: tst w9, #0x80
2107+
; CHECK-SD-NEXT: mov w9, #20894 // =0x519e
2108+
; CHECK-SD-NEXT: csel w0, w9, w8, mi
2109+
; CHECK-SD-NEXT: ret
2110+
;
2111+
; CHECK-GI-LABEL: i8_signbit_tst_constants:
2112+
; CHECK-GI: // %bb.0:
2113+
; CHECK-GI-NEXT: add w8, w0, w1
2114+
; CHECK-GI-NEXT: mov w9, #42 // =0x2a
2115+
; CHECK-GI-NEXT: mov w10, #20894 // =0x519e
2116+
; CHECK-GI-NEXT: sxtb w8, w8
2117+
; CHECK-GI-NEXT: cmp w8, #0
2118+
; CHECK-GI-NEXT: csel w0, w10, w9, mi
2119+
; CHECK-GI-NEXT: ret
2120+
%add = add i8 %x, %y
2121+
%cmp = icmp slt i8 %add, 0
2122+
%sel = select i1 %cmp, i32 20894, i32 42
2123+
ret i32 %sel
2124+
}
2125+
2126+
; Test i8 sign bit testing with variable select values (problematic case)
2127+
define i32 @i8_signbit_variables(i8 %x, i8 %y, i32 %a, i32 %b) {
2128+
; CHECK-SD-LABEL: i8_signbit_variables:
2129+
; CHECK-SD: // %bb.0:
2130+
; CHECK-SD-NEXT: add w8, w0, w1
2131+
; CHECK-SD-NEXT: tst w8, #0x80
2132+
; CHECK-SD-NEXT: csel w0, w2, w3, mi
2133+
; CHECK-SD-NEXT: ret
2134+
;
2135+
; CHECK-GI-LABEL: i8_signbit_variables:
2136+
; CHECK-GI: // %bb.0:
2137+
; CHECK-GI-NEXT: add w8, w0, w1
2138+
; CHECK-GI-NEXT: sxtb w8, w8
2139+
; CHECK-GI-NEXT: cmp w8, #0
2140+
; CHECK-GI-NEXT: csel w0, w2, w3, mi
2141+
; CHECK-GI-NEXT: ret
2142+
%add = add i8 %x, %y
2143+
%cmp = icmp slt i8 %add, 0
2144+
%sel = select i1 %cmp, i32 %a, i32 %b
2145+
ret i32 %sel
2146+
}

llvm/test/CodeGen/AArch64/vecreduce-bool.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
2626
; CHECK-LABEL: reduce_and_v1i8:
2727
; CHECK: // %bb.0:
2828
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
29-
; CHECK-NEXT: smov w8, v0.b[0]
30-
; CHECK-NEXT: cmp w8, #0
29+
; CHECK-NEXT: umov w8, v0.b[0]
30+
; CHECK-NEXT: tst w8, #0x80
3131
; CHECK-NEXT: csel w0, w0, w1, mi
3232
; CHECK-NEXT: ret
3333
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -120,8 +120,8 @@ define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
120120
; CHECK-LABEL: reduce_and_v1i16:
121121
; CHECK: // %bb.0:
122122
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
123-
; CHECK-NEXT: smov w8, v0.h[0]
124-
; CHECK-NEXT: cmp w8, #0
123+
; CHECK-NEXT: umov w8, v0.h[0]
124+
; CHECK-NEXT: tst w8, #0x8000
125125
; CHECK-NEXT: csel w0, w0, w1, mi
126126
; CHECK-NEXT: ret
127127
%x = icmp slt <1 x i16> %a0, zeroinitializer
@@ -305,8 +305,8 @@ define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
305305
; CHECK-LABEL: reduce_or_v1i8:
306306
; CHECK: // %bb.0:
307307
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
308-
; CHECK-NEXT: smov w8, v0.b[0]
309-
; CHECK-NEXT: cmp w8, #0
308+
; CHECK-NEXT: umov w8, v0.b[0]
309+
; CHECK-NEXT: tst w8, #0x80
310310
; CHECK-NEXT: csel w0, w0, w1, mi
311311
; CHECK-NEXT: ret
312312
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -399,8 +399,8 @@ define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
399399
; CHECK-LABEL: reduce_or_v1i16:
400400
; CHECK: // %bb.0:
401401
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
402-
; CHECK-NEXT: smov w8, v0.h[0]
403-
; CHECK-NEXT: cmp w8, #0
402+
; CHECK-NEXT: umov w8, v0.h[0]
403+
; CHECK-NEXT: tst w8, #0x8000
404404
; CHECK-NEXT: csel w0, w0, w1, mi
405405
; CHECK-NEXT: ret
406406
%x = icmp slt <1 x i16> %a0, zeroinitializer
@@ -584,8 +584,8 @@ define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
584584
; CHECK-LABEL: reduce_xor_v1i8:
585585
; CHECK: // %bb.0:
586586
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
587-
; CHECK-NEXT: smov w8, v0.b[0]
588-
; CHECK-NEXT: cmp w8, #0
587+
; CHECK-NEXT: umov w8, v0.b[0]
588+
; CHECK-NEXT: tst w8, #0x80
589589
; CHECK-NEXT: csel w0, w0, w1, mi
590590
; CHECK-NEXT: ret
591591
%x = icmp slt <1 x i8> %a0, zeroinitializer
@@ -679,8 +679,8 @@ define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind {
679679
; CHECK-LABEL: reduce_xor_v1i16:
680680
; CHECK: // %bb.0:
681681
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
682-
; CHECK-NEXT: smov w8, v0.h[0]
683-
; CHECK-NEXT: cmp w8, #0
682+
; CHECK-NEXT: umov w8, v0.h[0]
683+
; CHECK-NEXT: tst w8, #0x8000
684684
; CHECK-NEXT: csel w0, w0, w1, mi
685685
; CHECK-NEXT: ret
686686
%x = icmp slt <1 x i16> %a0, zeroinitializer

0 commit comments

Comments
 (0)