Skip to content

Commit d27802a

Browse files
authored
[DAGCombiner] Fold setcc of trunc, generalizing some NVPTX isel logic (#150270)
That change adds support for folding a SETCC when one or both of the operands is a TRUNCATE with the appropriate no-wrap flags. This pattern can occur when promoting i8 operations in NVPTX, and we currently have some ISel rules to try to handle it.
1 parent ee6afeb commit d27802a

File tree

4 files changed

+298
-21
lines changed

4 files changed

+298
-21
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5118,6 +5118,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
51185118
Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
51195119
}
51205120

5121+
// fold (setcc (trunc x) c) -> (setcc x c)
5122+
if (N0.getOpcode() == ISD::TRUNCATE &&
5123+
((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) ||
5124+
(N0->getFlags().hasNoSignedWrap() &&
5125+
!ISD::isUnsignedIntSetCC(Cond))) &&
5126+
isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5127+
EVT NewVT = N0.getOperand(0).getValueType();
5128+
SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond)
5129+
? C1.sext(NewVT.getSizeInBits())
5130+
: C1.zext(NewVT.getSizeInBits()),
5131+
dl, NewVT);
5132+
return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5133+
}
5134+
51215135
if (SDValue V =
51225136
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
51235137
return V;
@@ -5654,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
56545668
return N0;
56555669
}
56565670

5671+
// Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5672+
if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5673+
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5674+
((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5675+
N1->getFlags().hasNoUnsignedWrap()) ||
5676+
(!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() &&
5677+
N1->getFlags().hasNoSignedWrap())) &&
5678+
isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) {
5679+
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5680+
}
5681+
56575682
// Could not fold it.
56585683
return SDValue();
56595684
}

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,18 +1560,6 @@ def : Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel
15601560
(PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE),
15611561
(cond2cc $cc))>;
15621562

1563-
// A 16-bit comparison of truncated byte extracts can be be converted to 32-bit
1564-
// comparison because we know that the truncate is just trancating off zeros
1565-
// and that the most-significant byte is also zeros so the meaning of signed and
1566-
// unsigned comparisons will not be changed.
1567-
def : Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))),
1568-
(i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))),
1569-
cond:$cc),
1570-
(SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE),
1571-
(PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE),
1572-
(cond2cc $cc))>;
1573-
1574-
15751563
def SDTDeclareArrayParam :
15761564
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
15771565
def SDTDeclareScalarParam :

llvm/test/CodeGen/NVPTX/sext-setcc.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,20 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
2929
; CHECK-LABEL: sext_setcc_v4i1_to_v4i8(
3030
; CHECK: {
3131
; CHECK-NEXT: .reg .pred %p<5>;
32-
; CHECK-NEXT: .reg .b16 %rs<5>;
3332
; CHECK-NEXT: .reg .b32 %r<13>;
3433
; CHECK-NEXT: .reg .b64 %rd<2>;
3534
; CHECK-EMPTY:
3635
; CHECK-NEXT: // %bb.0: // %entry
3736
; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v4i1_to_v4i8_param_0];
3837
; CHECK-NEXT: ld.b32 %r1, [%rd1];
3938
; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U;
40-
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
41-
; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
39+
; CHECK-NEXT: setp.eq.b32 %p1, %r2, 0;
4240
; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U;
43-
; CHECK-NEXT: cvt.u16.u32 %rs2, %r3;
44-
; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0;
41+
; CHECK-NEXT: setp.eq.b32 %p2, %r3, 0;
4542
; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U;
46-
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
47-
; CHECK-NEXT: setp.eq.b16 %p3, %rs3, 0;
43+
; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0;
4844
; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U;
49-
; CHECK-NEXT: cvt.u16.u32 %rs4, %r5;
50-
; CHECK-NEXT: setp.eq.b16 %p4, %rs4, 0;
45+
; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0;
5146
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4;
5247
; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3;
5348
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mcpu=sm_50 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -mcpu=sm_50 | %ptxas-verify -arch=sm_50 %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
define i1 @trunc_nsw_singed_const(i32 %a) {
8+
; CHECK-LABEL: trunc_nsw_singed_const(
9+
; CHECK: {
10+
; CHECK-NEXT: .reg .pred %p<2>;
11+
; CHECK-NEXT: .reg .b32 %r<4>;
12+
; CHECK-EMPTY:
13+
; CHECK-NEXT: // %bb.0:
14+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_const_param_0];
15+
; CHECK-NEXT: add.s32 %r2, %r1, 1;
16+
; CHECK-NEXT: setp.gt.s32 %p1, %r2, -1;
17+
; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
18+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
19+
; CHECK-NEXT: ret;
20+
%a2 = add i32 %a, 1
21+
%b = trunc nsw i32 %a2 to i8
22+
%c = icmp sgt i8 %b, -1
23+
ret i1 %c
24+
}
25+
26+
define i1 @trunc_nuw_singed_const(i32 %a) {
27+
; CHECK-LABEL: trunc_nuw_singed_const(
28+
; CHECK: {
29+
; CHECK-NEXT: .reg .pred %p<2>;
30+
; CHECK-NEXT: .reg .b16 %rs<4>;
31+
; CHECK-NEXT: .reg .b32 %r<2>;
32+
; CHECK-EMPTY:
33+
; CHECK-NEXT: // %bb.0:
34+
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_const_param_0];
35+
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
36+
; CHECK-NEXT: cvt.s16.s8 %rs3, %rs2;
37+
; CHECK-NEXT: setp.lt.s16 %p1, %rs3, 100;
38+
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
39+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
40+
; CHECK-NEXT: ret;
41+
%a2 = add i32 %a, 1
42+
%b = trunc nuw i32 %a2 to i8
43+
%c = icmp slt i8 %b, 100
44+
ret i1 %c
45+
}
46+
47+
define i1 @trunc_nsw_unsinged_const(i32 %a) {
48+
; CHECK-LABEL: trunc_nsw_unsinged_const(
49+
; CHECK: {
50+
; CHECK-NEXT: .reg .pred %p<2>;
51+
; CHECK-NEXT: .reg .b16 %rs<4>;
52+
; CHECK-NEXT: .reg .b32 %r<2>;
53+
; CHECK-EMPTY:
54+
; CHECK-NEXT: // %bb.0:
55+
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_const_param_0];
56+
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
57+
; CHECK-NEXT: and.b16 %rs3, %rs2, 255;
58+
; CHECK-NEXT: setp.lt.u16 %p1, %rs3, 236;
59+
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
60+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
61+
; CHECK-NEXT: ret;
62+
%a2 = add i32 %a, 1
63+
%b = trunc nsw i32 %a2 to i8
64+
%c = icmp ult i8 %b, -20
65+
ret i1 %c
66+
}
67+
68+
define i1 @trunc_nuw_unsinged_const(i32 %a) {
69+
; CHECK-LABEL: trunc_nuw_unsinged_const(
70+
; CHECK: {
71+
; CHECK-NEXT: .reg .pred %p<2>;
72+
; CHECK-NEXT: .reg .b32 %r<4>;
73+
; CHECK-EMPTY:
74+
; CHECK-NEXT: // %bb.0:
75+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_const_param_0];
76+
; CHECK-NEXT: add.s32 %r2, %r1, 1;
77+
; CHECK-NEXT: setp.gt.u32 %p1, %r2, 100;
78+
; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1;
79+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
80+
; CHECK-NEXT: ret;
81+
%a2 = add i32 %a, 1
82+
%b = trunc nuw i32 %a2 to i8
83+
%c = icmp ugt i8 %b, 100
84+
ret i1 %c
85+
}
86+
87+
88+
define i1 @trunc_nsw_eq_const(i32 %a) {
89+
; CHECK-LABEL: trunc_nsw_eq_const(
90+
; CHECK: {
91+
; CHECK-NEXT: .reg .pred %p<2>;
92+
; CHECK-NEXT: .reg .b32 %r<3>;
93+
; CHECK-EMPTY:
94+
; CHECK-NEXT: // %bb.0:
95+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_const_param_0];
96+
; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
97+
; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
98+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
99+
; CHECK-NEXT: ret;
100+
%a2 = add i32 %a, 1
101+
%b = trunc nsw i32 %a2 to i8
102+
%c = icmp eq i8 %b, 100
103+
ret i1 %c
104+
}
105+
106+
define i1 @trunc_nuw_eq_const(i32 %a) {
107+
; CHECK-LABEL: trunc_nuw_eq_const(
108+
; CHECK: {
109+
; CHECK-NEXT: .reg .pred %p<2>;
110+
; CHECK-NEXT: .reg .b32 %r<3>;
111+
; CHECK-EMPTY:
112+
; CHECK-NEXT: // %bb.0:
113+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_const_param_0];
114+
; CHECK-NEXT: setp.eq.b32 %p1, %r1, 99;
115+
; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
116+
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
117+
; CHECK-NEXT: ret;
118+
%a2 = add i32 %a, 1
119+
%b = trunc nuw i32 %a2 to i8
120+
%c = icmp eq i8 %b, 100
121+
ret i1 %c
122+
}
123+
124+
;;;
125+
126+
define i1 @trunc_nsw_singed(i32 %a1, i32 %a2) {
127+
; CHECK-LABEL: trunc_nsw_singed(
128+
; CHECK: {
129+
; CHECK-NEXT: .reg .pred %p<2>;
130+
; CHECK-NEXT: .reg .b32 %r<6>;
131+
; CHECK-EMPTY:
132+
; CHECK-NEXT: // %bb.0:
133+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_singed_param_0];
134+
; CHECK-NEXT: add.s32 %r2, %r1, 1;
135+
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_singed_param_1];
136+
; CHECK-NEXT: add.s32 %r4, %r3, 7;
137+
; CHECK-NEXT: setp.gt.s32 %p1, %r2, %r4;
138+
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
139+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
140+
; CHECK-NEXT: ret;
141+
%b1 = add i32 %a1, 1
142+
%b2 = add i32 %a2, 7
143+
%c1 = trunc nsw i32 %b1 to i8
144+
%c2 = trunc nsw i32 %b2 to i8
145+
%c = icmp sgt i8 %c1, %c2
146+
ret i1 %c
147+
}
148+
149+
define i1 @trunc_nuw_singed(i32 %a1, i32 %a2) {
150+
; CHECK-LABEL: trunc_nuw_singed(
151+
; CHECK: {
152+
; CHECK-NEXT: .reg .pred %p<2>;
153+
; CHECK-NEXT: .reg .b16 %rs<7>;
154+
; CHECK-NEXT: .reg .b32 %r<2>;
155+
; CHECK-EMPTY:
156+
; CHECK-NEXT: // %bb.0:
157+
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nuw_singed_param_0];
158+
; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nuw_singed_param_1];
159+
; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
160+
; CHECK-NEXT: cvt.s16.s8 %rs4, %rs3;
161+
; CHECK-NEXT: add.s16 %rs5, %rs2, 6;
162+
; CHECK-NEXT: cvt.s16.s8 %rs6, %rs5;
163+
; CHECK-NEXT: setp.lt.s16 %p1, %rs4, %rs6;
164+
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
165+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
166+
; CHECK-NEXT: ret;
167+
%b1 = add i32 %a1, 1
168+
%b2 = add i32 %a2, 6
169+
%c1 = trunc nuw i32 %b1 to i8
170+
%c2 = trunc nuw i32 %b2 to i8
171+
%c = icmp slt i8 %c1, %c2
172+
ret i1 %c
173+
}
174+
175+
define i1 @trunc_nsw_unsinged(i32 %a1, i32 %a2) {
176+
; CHECK-LABEL: trunc_nsw_unsinged(
177+
; CHECK: {
178+
; CHECK-NEXT: .reg .pred %p<2>;
179+
; CHECK-NEXT: .reg .b16 %rs<7>;
180+
; CHECK-NEXT: .reg .b32 %r<2>;
181+
; CHECK-EMPTY:
182+
; CHECK-NEXT: // %bb.0:
183+
; CHECK-NEXT: ld.param.b8 %rs1, [trunc_nsw_unsinged_param_0];
184+
; CHECK-NEXT: ld.param.b8 %rs2, [trunc_nsw_unsinged_param_1];
185+
; CHECK-NEXT: add.s16 %rs3, %rs1, 1;
186+
; CHECK-NEXT: and.b16 %rs4, %rs3, 255;
187+
; CHECK-NEXT: add.s16 %rs5, %rs2, 4;
188+
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
189+
; CHECK-NEXT: setp.lt.u16 %p1, %rs4, %rs6;
190+
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
191+
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
192+
; CHECK-NEXT: ret;
193+
%b1 = add i32 %a1, 1
194+
%b2 = add i32 %a2, 4
195+
%c1 = trunc nsw i32 %b1 to i8
196+
%c2 = trunc nsw i32 %b2 to i8
197+
%c = icmp ult i8 %c1, %c2
198+
ret i1 %c
199+
}
200+
201+
define i1 @trunc_nuw_unsinged(i32 %a1, i32 %a2) {
202+
; CHECK-LABEL: trunc_nuw_unsinged(
203+
; CHECK: {
204+
; CHECK-NEXT: .reg .pred %p<2>;
205+
; CHECK-NEXT: .reg .b32 %r<6>;
206+
; CHECK-EMPTY:
207+
; CHECK-NEXT: // %bb.0:
208+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_unsinged_param_0];
209+
; CHECK-NEXT: add.s32 %r2, %r1, 1;
210+
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_unsinged_param_1];
211+
; CHECK-NEXT: add.s32 %r4, %r3, 5;
212+
; CHECK-NEXT: setp.gt.u32 %p1, %r2, %r4;
213+
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
214+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
215+
; CHECK-NEXT: ret;
216+
%b1 = add i32 %a1, 1
217+
%b2 = add i32 %a2, 5
218+
%c1 = trunc nuw i32 %b1 to i8
219+
%c2 = trunc nuw i32 %b2 to i8
220+
%c = icmp ugt i8 %c1, %c2
221+
ret i1 %c
222+
}
223+
224+
225+
define i1 @trunc_nsw_eq(i32 %a1, i32 %a2) {
226+
; CHECK-LABEL: trunc_nsw_eq(
227+
; CHECK: {
228+
; CHECK-NEXT: .reg .pred %p<2>;
229+
; CHECK-NEXT: .reg .b32 %r<6>;
230+
; CHECK-EMPTY:
231+
; CHECK-NEXT: // %bb.0:
232+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nsw_eq_param_0];
233+
; CHECK-NEXT: add.s32 %r2, %r1, 1;
234+
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nsw_eq_param_1];
235+
; CHECK-NEXT: add.s32 %r4, %r3, 3;
236+
; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
237+
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
238+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
239+
; CHECK-NEXT: ret;
240+
%b1 = add i32 %a1, 1
241+
%b2 = add i32 %a2, 3
242+
%c1 = trunc nsw i32 %b1 to i8
243+
%c2 = trunc nsw i32 %b2 to i8
244+
%c = icmp eq i8 %c1, %c2
245+
ret i1 %c
246+
}
247+
248+
define i1 @trunc_nuw_eq(i32 %a1, i32 %a2) {
249+
; CHECK-LABEL: trunc_nuw_eq(
250+
; CHECK: {
251+
; CHECK-NEXT: .reg .pred %p<2>;
252+
; CHECK-NEXT: .reg .b32 %r<6>;
253+
; CHECK-EMPTY:
254+
; CHECK-NEXT: // %bb.0:
255+
; CHECK-NEXT: ld.param.b32 %r1, [trunc_nuw_eq_param_0];
256+
; CHECK-NEXT: add.s32 %r2, %r1, 2;
257+
; CHECK-NEXT: ld.param.b32 %r3, [trunc_nuw_eq_param_1];
258+
; CHECK-NEXT: add.s32 %r4, %r3, 1;
259+
; CHECK-NEXT: setp.eq.b32 %p1, %r2, %r4;
260+
; CHECK-NEXT: selp.b32 %r5, -1, 0, %p1;
261+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
262+
; CHECK-NEXT: ret;
263+
%b1 = add i32 %a1, 2
264+
%b2 = add i32 %a2, 1
265+
%c1 = trunc nuw i32 %b1 to i8
266+
%c2 = trunc nuw i32 %b2 to i8
267+
%c = icmp eq i8 %c1, %c2
268+
ret i1 %c
269+
}

0 commit comments

Comments
 (0)