|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
2 | | -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS |
3 | | -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -S | FileCheck %s |
| 2 | +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS |
| 3 | +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s |
4 | 4 |
|
5 | 5 | ; These tests are to check that fold-tail procedure produces correct scalar code when |
6 | 6 | ; loop-vectorization is only unrolling but not vectorizing. |
@@ -141,5 +141,61 @@ for.body: |
141 | 141 | %cond = icmp eq ptr %ptr, %ptr2 |
142 | 142 | br i1 %cond, label %for.cond.cleanup, label %for.body |
143 | 143 | } |
| 144 | + |
| 145 | +define i64 @live_out_scalar_vf(i64 %n) { |
| 146 | +; CHECK-LABEL: @live_out_scalar_vf( |
| 147 | +; CHECK-NEXT: entry: |
| 148 | +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 |
| 149 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 |
| 150 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] |
| 151 | +; CHECK: vector.ph: |
| 152 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 |
| 153 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] |
| 154 | +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] |
| 155 | +; CHECK: vector.body: |
| 156 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 157 | +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 158 | +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| 159 | +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) |
| 160 | +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) |
| 161 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 |
| 162 | +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) |
| 163 | +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 164 | +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| 165 | +; CHECK: middle.block: |
| 166 | +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[STEP_ADD_3]], i32 3 |
| 167 | +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[STEP_ADD_3]], i32 2 |
| 168 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] |
| 169 | +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] |
| 170 | +; CHECK: scalar.ph: |
| 171 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] |
| 172 | +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] |
| 173 | +; CHECK-NEXT: br label [[LOOP:%.*]] |
| 174 | +; CHECK: loop: |
| 175 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] |
| 176 | +; CHECK-NEXT: [[EXITVAL:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV]], [[LOOP]] ] |
| 177 | +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| 178 | +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] |
| 179 | +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] |
| 180 | +; CHECK: exit: |
| 181 | +; CHECK-NEXT: [[TMP19:%.*]] = phi i64 [ [[EXITVAL]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] |
| 182 | +; CHECK-NEXT: ret i64 [[TMP19]] |
| 183 | +; |
| 184 | +entry: |
| 185 | + br label %loop |
| 186 | + |
| 187 | +loop: |
| 188 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 189 | + ; Need to use a phi otherwise the header mask will use a |
| 190 | + ; VPWidenCanonicalIVRecipe instead of a VPScalarIVStepsRecipe. |
| 191 | + %exitval = phi i64 [ 0, %entry ], [ %iv, %loop ] |
| 192 | + %iv.next = add i64 %iv, 1 |
| 193 | + %ec = icmp eq i64 %iv, %n |
| 194 | + br i1 %ec, label %exit, label %loop |
| 195 | + |
| 196 | +exit: |
| 197 | + ret i64 %exitval |
| 198 | +} |
| 199 | + |
144 | 200 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
145 | 201 | ; CHECK-REMARKS: {{.*}} |
0 commit comments