1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 5
12; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
23
34target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
45
56; PR39417
67; Check that the need for overflow check prevents vectorizing a loop with tiny
78; trip count (which implies opt for size).
8- ; CHECK-LABEL: @func_34
9- ; CHECK-NOT: vector.scevcheck
10- ; CHECK-NOT: vector.body:
11- ; CHECK-LABEL: bb67:
129define void @func_34 () {
13- bb1:
14- br label %bb67
10+ ; CHECK-LABEL: define void @func_34() {
11+ ; CHECK-NEXT: [[ENTRY:.*]]:
12+ ; CHECK-NEXT: br label %[[LOOP:.*]]
13+ ; CHECK: [[LOOP]]:
14+ ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
15+ ; CHECK-NEXT: [[SEXT:%.*]] = shl i32 [[IV]], 16
16+ ; CHECK-NEXT: [[STEP:%.*]] = ashr exact i32 [[SEXT]], 16
17+ ; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[STEP]], 1
18+ ; CHECK-NEXT: [[IV_NEXT_TRUNC:%.*]] = trunc i32 [[IV_NEXT]] to i16
19+ ; CHECK-NEXT: [[EC:%.*]] = icmp slt i16 [[IV_NEXT_TRUNC]], 3
20+ ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
21+ ; CHECK: [[EXIT]]:
22+ ; CHECK-NEXT: ret void
23+ ;
24+ entry:
25+ br label %loop
1526
16- bb67 :
17- %storemerge2 = phi i32 [ 0 , %bb1 ], [ %_tmp2300 , %bb67 ]
18- %sext = shl i32 %storemerge2 , 16
19- %_tmp2299 = ashr exact i32 %sext , 16
20- %_tmp2300 = add nsw i32 %_tmp2299 , 1
21- %_tmp2310 = trunc i32 %_tmp2300 to i16
22- %_tmp2312 = icmp slt i16 %_tmp2310 , 3
23- br i1 %_tmp2312 , label %bb67 , label %bb68
27+ loop :
28+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
29+ %sext = shl i32 %iv , 16
30+ %step = ashr exact i32 %sext , 16
31+ %iv.next = add nsw i32 %step , 1
32+ %iv.next.trunc = trunc i32 %iv.next to i16
33+ %ec = icmp slt i16 %iv.next.trunc , 3
34+ br i1 %ec , label %loop , label %exit
2435
25- bb68 :
36+ exit :
2637 ret void
2738}
2839
2940; Check that a loop under opt-for-size is vectorized, w/o checking for
3041; stride==1.
3142; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py
3243define void @scev4stride1 (ptr noalias nocapture %a , ptr noalias nocapture readonly %b , i32 %k ) #0 {
33- ; CHECK-LABEL: @scev4stride1(
34- ; CHECK-NEXT: for.body.preheader:
35- ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
36- ; CHECK: vector.ph:
37- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0
44+ ; CHECK-LABEL: define void @scev4stride1(
45+ ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i32 [[K:%.*]]) #[[ATTR0:[0-9]+]] {
46+ ; CHECK-NEXT: [[ENTRY:.*:]]
47+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
48+ ; CHECK: [[VECTOR_PH]]:
49+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 0
3850; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
39- ; CHECK-NEXT: br label [[VECTOR_BODY:% .*]]
40- ; CHECK: vector.body :
41- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
42- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
51+ ; CHECK-NEXT: br label % [[VECTOR_BODY:.*]]
52+ ; CHECK: [[VECTOR_BODY]] :
53+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, % [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], % [[VECTOR_BODY]] ]
54+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, % [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], % [[VECTOR_BODY]] ]
4355; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
4456; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
45- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.* ]], i32 [[TMP5]]
57+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP5]]
4658; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
4759; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP7]]
4860; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
@@ -57,34 +69,31 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon
5769; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i32 1
5870; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP15]], i32 2
5971; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 3
60- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A:%.* ]], i32 [[INDEX]]
72+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
6173; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP21]], align 4
6274; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
6375; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
6476; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
65- ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
66- ; CHECK: middle.block:
67- ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]]
68- ; CHECK: scalar.ph:
69- ; CHECK: for.body:
70- ; CHECK: for.end.loopexit:
71- ; CHECK-NEXT: ret void
77+ ; CHECK-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
78+ ; CHECK: [[MIDDLE_BLOCK]]:
79+ ; CHECK-NEXT: br [[EXIT:label %.*]]
80+ ; CHECK: [[SCALAR_PH]]:
7281;
73- for.body.preheader :
74- br label %for.body
82+ entry :
83+ br label %loop
7584
76- for.body :
77- %i.07 = phi i32 [ %inc , %for.body ], [ 0 , %for.body.preheader ]
78- %mul = mul nsw i32 %i.07 , %k
79- %arrayidx = getelementptr inbounds i32 , ptr %b , i32 %mul
80- %0 = load i32 , ptr %arrayidx , align 4
81- %arrayidx1 = getelementptr inbounds i32 , ptr %a , i32 %i.07
82- store i32 %0 , ptr %arrayidx1 , align 4
83- %inc = add nuw nsw i32 %i.07 , 1
84- %exitcond = icmp eq i32 %inc , 1024
85- br i1 %exitcond , label %for.end.loopexit , label %for.body
85+ loop :
86+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
87+ %mul = mul nsw i32 %iv , %k
88+ %gep.b = getelementptr inbounds i32 , ptr %b , i32 %mul
89+ %0 = load i32 , ptr %gep.b , align 4
90+ %gep.a = getelementptr inbounds i32 , ptr %a , i32 %iv
91+ store i32 %0 , ptr %gep.a , align 4
92+ %iv.next = add nuw nsw i32 %iv , 1
93+ %ec = icmp eq i32 %iv.next , 1024
94+ br i1 %ec , label %exit , label %loop
8695
87- for.end.loopexit :
96+ exit :
8897 ret void
8998}
9099
0 commit comments