22; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
33
44; The LIT tests rely on i32, i16 and i8 being valid machine types.
5- target datalayout = "n8:16:32"
5+ ; The bounds checking tests require also i64 and i128.
6+ target datalayout = "n8:16:32:64:128"
67
78; This LIT test checks if TruncInstCombine pass correctly recognizes the
89; constraints from a signed min-max clamp. The clamp is a sequence of smin and
@@ -12,6 +13,11 @@ target datalayout = "n8:16:32"
1213; of smin and smax:
1314; a) y = smax(smin(x, upper_limit), lower_limit)
1415; b) y = smin(smax(x, lower_limit), upper_limit)
16+ ;
17+ ; The clamp is used in TruncInstCombine.cpp pass (as part of aggressive-instcombine)
18+ ; to optimize extensions and truncations of lshr. This is what is tested here.
19+ ; The pass also optimizes extensions and truncations of other binary operators,
20+ ; but in such cases the smin-smax clamp may not be used.
1521
1622define i8 @test_0a (i16 %x ) {
1723; CHECK-LABEL: define i8 @test_0a(
@@ -47,6 +53,8 @@ define i8 @test_0b(i16 %x) {
4753 ret i8 %b.trunc
4854}
4955
56+ ; The following two tests contain add instead of lshr.
57+ ; The optimization works here as well.
5058define i8 @test_1a (i16 %x ) {
5159; CHECK-LABEL: define i8 @test_1a(
5260; CHECK-SAME: i16 [[X:%.*]]) {
@@ -81,19 +89,23 @@ define i8 @test_1b(i16 %x) {
8189 ret i8 %b.trunc
8290}
8391
92+ ; Tests for clamping with negative min and max.
93+
94+ ; With sext no optimization occurs.
8495define i8 @test_2a (i16 %x ) {
8596; CHECK-LABEL: define i8 @test_2a(
8697; CHECK-SAME: i16 [[X:%.*]]) {
8798; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
8899; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
89- ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
90- ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
91- ; CHECK-NEXT: ret i8 [[B]]
100+ ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
101+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
102+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
103+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
92104;
93105 %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 -1 )
94106 %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
95107 %a = sext i16 %2 to i32
96- %b = add i32 %a , 2
108+ %b = lshr i32 %a , 2
97109 %b.trunc = trunc i32 %b to i8
98110 ret i8 %b.trunc
99111}
@@ -103,31 +115,69 @@ define i8 @test_2b(i16 %x) {
103115; CHECK-SAME: i16 [[X:%.*]]) {
104116; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
105117; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
106- ; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
107- ; CHECK-NEXT: [[B:%.*]] = add i8 [[A]], 2
108- ; CHECK-NEXT: ret i8 [[B]]
118+ ; CHECK-NEXT: [[A:%.*]] = sext i16 [[TMP2]] to i32
119+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[A]], 2
120+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
121+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
109122;
110123 %1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
111124 %2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 -1 )
112125 %a = sext i16 %2 to i32
113- %b = add i32 %a , 2
126+ %b = lshr i32 %a , 2
127+ %b.trunc = trunc i32 %b to i8
128+ ret i8 %b.trunc
129+ }
130+
131+ ; With zext the optimization occurs.
132+ define i8 @test_2c (i16 %x ) {
133+ ; CHECK-LABEL: define i8 @test_2c(
134+ ; CHECK-SAME: i16 [[X:%.*]]) {
135+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 -1)
136+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
137+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
138+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
139+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
140+ ;
141+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 -1 )
142+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
143+ %a = zext i16 %2 to i32
144+ %b = lshr i32 %a , 2
114145 %b.trunc = trunc i32 %b to i8
115146 ret i8 %b.trunc
116147}
117148
149+ define i8 @test_2d (i16 %x ) {
150+ ; CHECK-LABEL: define i8 @test_2d(
151+ ; CHECK-SAME: i16 [[X:%.*]]) {
152+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
153+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 -1)
154+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
155+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
156+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
157+ ;
158+ %1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
159+ %2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 -1 )
160+ %a = zext i16 %2 to i32
161+ %b = lshr i32 %a , 2
162+ %b.trunc = trunc i32 %b to i8
163+ ret i8 %b.trunc
164+ }
165+
166+ ; Tests for clamping with mixed-signed min and max.
167+ ; With zext the optimization occurs.
118168define i8 @test_3a (i16 %x ) {
119169; CHECK-LABEL: define i8 @test_3a(
120170; CHECK-SAME: i16 [[X:%.*]]) {
121171; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 31)
122172; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -31)
123- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
124- ; CHECK-NEXT: [[B :%.*]] = add i8 [[A]], 2
125- ; CHECK-NEXT: ret i8 [[B ]]
173+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
174+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
175+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
126176;
127177 %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 31 )
128178 %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -31 )
129- %a = sext i16 %2 to i32
130- %b = add i32 %a , 2
179+ %a = zext i16 %2 to i32
180+ %b = lshr i32 %a , 2
131181 %b.trunc = trunc i32 %b to i8
132182 ret i8 %b.trunc
133183}
@@ -137,31 +187,32 @@ define i8 @test_3b(i16 %x) {
137187; CHECK-SAME: i16 [[X:%.*]]) {
138188; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smax.i16(i16 [[X]], i16 -31)
139189; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smin.i16(i16 [[TMP1]], i16 31)
140- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
141- ; CHECK-NEXT: [[B :%.*]] = add i8 [[A]], 2
142- ; CHECK-NEXT: ret i8 [[B ]]
190+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
191+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
192+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
143193;
144194 %1 = tail call i16 @llvm.smax.i16 (i16 %x , i16 -31 )
145195 %2 = tail call i16 @llvm.smin.i16 (i16 %1 , i16 31 )
146- %a = sext i16 %2 to i32
147- %b = add i32 %a , 2
196+ %a = zext i16 %2 to i32
197+ %b = lshr i32 %a , 2
148198 %b.trunc = trunc i32 %b to i8
149199 ret i8 %b.trunc
150200}
151201
202+ ; Optimizations with vector types.
152203define <16 x i8 > @test_vec_1a (<16 x i16 > %x ) {
153204; CHECK-LABEL: define <16 x i8> @test_vec_1a(
154205; CHECK-SAME: <16 x i16> [[X:%.*]]) {
155206; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[X]], <16 x i16> splat (i16 127))
156207; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[TMP1]], <16 x i16> zeroinitializer)
157208; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
158- ; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
209+ ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
159210; CHECK-NEXT: ret <16 x i8> [[B]]
160211;
161212 %1 = tail call <16 x i16 > @llvm.smin.v16i16 (<16 x i16 > %x , <16 x i16 > splat (i16 127 ))
162213 %2 = tail call <16 x i16 > @llvm.smax.v16i16 (<16 x i16 > %1 , <16 x i16 > zeroinitializer )
163214 %a = sext <16 x i16 > %2 to <16 x i32 >
164- %b = add <16 x i32 > %a , splat (i32 2 )
215+ %b = lshr <16 x i32 > %a , splat (i32 2 )
165216 %b.trunc = trunc <16 x i32 > %b to <16 x i8 >
166217 ret <16 x i8 > %b.trunc
167218}
@@ -172,13 +223,13 @@ define <16 x i8> @test_vec_1b(<16 x i16> %x) {
172223; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> [[X]], <16 x i16> zeroinitializer)
173224; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> [[TMP1]], <16 x i16> splat (i16 127))
174225; CHECK-NEXT: [[A:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
175- ; CHECK-NEXT: [[B:%.*]] = add <16 x i8> [[A]], splat (i8 2)
226+ ; CHECK-NEXT: [[B:%.*]] = lshr <16 x i8> [[A]], splat (i8 2)
176227; CHECK-NEXT: ret <16 x i8> [[B]]
177228;
178229 %1 = tail call <16 x i16 > @llvm.smax.v16i16 (<16 x i16 > %x , <16 x i16 > zeroinitializer )
179230 %2 = tail call <16 x i16 > @llvm.smin.v16i16 (<16 x i16 > %1 , <16 x i16 > splat (i16 127 ))
180231 %a = sext <16 x i16 > %2 to <16 x i32 >
181- %b = add <16 x i32 > %a , splat (i32 2 )
232+ %b = lshr <16 x i32 > %a , splat (i32 2 )
182233 %b.trunc = trunc <16 x i32 > %b to <16 x i8 >
183234 ret <16 x i8 > %b.trunc
184235}
@@ -217,14 +268,14 @@ define i8 @test_bounds_1(i16 %x) {
217268; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 127)
218269; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
219270; CHECK-NEXT: [[A:%.*]] = trunc i16 [[TMP2]] to i8
220- ; CHECK-NEXT: [[SHR :%.*]] = ashr i8 [[A]], 7
221- ; CHECK-NEXT: ret i8 [[SHR ]]
271+ ; CHECK-NEXT: [[B :%.*]] = lshr i8 [[A]], 7
272+ ; CHECK-NEXT: ret i8 [[B ]]
222273;
223274 %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 127 )
224275 %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 0 )
225276 %a = sext i16 %2 to i32
226- %shr = ashr i32 %a , 7
227- %b.trunc = trunc i32 %shr to i8
277+ %b = lshr i32 %a , 7
278+ %b.trunc = trunc i32 %b to i8
228279 ret i8 %b.trunc
229280}
230281
@@ -234,15 +285,15 @@ define i8 @test_bounds_2(i16 %x) {
234285; CHECK-SAME: i16 [[X:%.*]]) {
235286; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 128)
236287; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 0)
237- ; CHECK-NEXT: [[SHR :%.*]] = ashr i16 [[TMP2]], 7
238- ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[SHR]] to i8
239- ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
288+ ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
289+ ; CHECK-NEXT: [[B :%.*]] = lshr i8 [[A]], 7
290+ ; CHECK-NEXT: ret i8 [[B ]]
240291;
241292 %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 128 )
242293 %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 0 )
243294 %a = sext i16 %2 to i32
244- %shr = ashr i32 %a , 7
245- %b.trunc = trunc i32 %shr to i8
295+ %b = lshr i32 %a , 7
296+ %b.trunc = trunc i32 %b to i8
246297 ret i8 %b.trunc
247298}
248299
@@ -253,14 +304,85 @@ define i8 @test_bounds_3(i16 %x) {
253304; CHECK-SAME: i16 [[X:%.*]]) {
254305; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
255306; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 32752)
256- ; CHECK-NEXT: [[A :%.*]] = trunc i16 [[TMP2]] to i8
257- ; CHECK-NEXT: [[AND :%.*]] = and i8 [[A]], -1
258- ; CHECK-NEXT: ret i8 [[AND ]]
307+ ; CHECK-NEXT: [[B :%.*]] = lshr i16 [[TMP2]], 2
308+ ; CHECK-NEXT: [[B_TRUNC :%.*]] = trunc i16 [[B]] to i8
309+ ; CHECK-NEXT: ret i8 [[B_TRUNC ]]
259310;
260311 %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 32767 )
261312 %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 32752 )
262313 %a = sext i16 %2 to i32
263- %and = and i32 %a , 255
264- %b.trunc = trunc i32 %and to i8
314+ %b = lshr i32 %a , 2
315+ %b.trunc = trunc i32 %b to i8
316+ ret i8 %b.trunc
317+ }
318+
319+ ; Here min = 128 is greater than max = 0.
320+ define i8 @test_bounds_4 (i16 %x ) {
321+ ; CHECK-LABEL: define i8 @test_bounds_4(
322+ ; CHECK-SAME: i16 [[X:%.*]]) {
323+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 0)
324+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 128)
325+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
326+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
327+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
328+ ;
329+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 0 )
330+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 128 )
331+ %a = sext i16 %2 to i32
332+ %b = lshr i32 %a , 2
333+ %b.trunc = trunc i32 %b to i8
334+ ret i8 %b.trunc
335+ }
336+
337+ ; The following 3 tests check the situation where min and max are minimal and
338+ ; maximal signed values. No transformations should occur here.
339+ define i8 @test_bounds_5 (i16 %x ) {
340+ ; CHECK-LABEL: define i8 @test_bounds_5(
341+ ; CHECK-SAME: i16 [[X:%.*]]) {
342+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[X]], i16 32767)
343+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i16 @llvm.smax.i16(i16 [[TMP1]], i16 -32768)
344+ ; CHECK-NEXT: [[B:%.*]] = lshr i16 [[TMP2]], 2
345+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i16 [[B]] to i8
346+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
347+ ;
348+ %1 = tail call i16 @llvm.smin.i16 (i16 %x , i16 32767 )
349+ %2 = tail call i16 @llvm.smax.i16 (i16 %1 , i16 -32768 )
350+ %a = zext i16 %2 to i32
351+ %b = lshr i32 %a , 2
352+ %b.trunc = trunc i32 %b to i8
353+ ret i8 %b.trunc
354+ }
355+
356+ define i8 @test_bounds_6 (i32 %x ) {
357+ ; CHECK-LABEL: define i8 @test_bounds_6(
358+ ; CHECK-SAME: i32 [[X:%.*]]) {
359+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[X]], i32 2147483647)
360+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP1]], i32 -2147483648)
361+ ; CHECK-NEXT: [[B:%.*]] = lshr i32 [[TMP2]], 2
362+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i32 [[B]] to i8
363+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
364+ ;
365+ %1 = tail call i32 @llvm.smin.i32 (i32 %x , i32 2147483647 )
366+ %2 = tail call i32 @llvm.smax.i32 (i32 %1 , i32 -2147483648 )
367+ %a = zext i32 %2 to i64
368+ %b = lshr i64 %a , 2
369+ %b.trunc = trunc i64 %b to i8
370+ ret i8 %b.trunc
371+ }
372+
373+ define i8 @test_bounds_7 (i64 %x ) {
374+ ; CHECK-LABEL: define i8 @test_bounds_7(
375+ ; CHECK-SAME: i64 [[X:%.*]]) {
376+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.smin.i64(i64 [[X]], i64 9223372036854775807)
377+ ; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP1]], i64 -9223372036854775808)
378+ ; CHECK-NEXT: [[B:%.*]] = lshr i64 [[TMP2]], 2
379+ ; CHECK-NEXT: [[B_TRUNC:%.*]] = trunc i64 [[B]] to i8
380+ ; CHECK-NEXT: ret i8 [[B_TRUNC]]
381+ ;
382+ %1 = tail call i64 @llvm.smin.i64 (i64 %x , i64 9223372036854775807 )
383+ %2 = tail call i64 @llvm.smax.i64 (i64 %1 , i64 -9223372036854775808 )
384+ %a = zext i64 %2 to i128
385+ %b = lshr i128 %a , 2
386+ %b.trunc = trunc i128 %b to i8
265387 ret i8 %b.trunc
266388}
0 commit comments