|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
1 | 2 | ; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s |
2 | 3 |
|
3 | 4 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" |
4 | 5 | target triple = "x86_64-unknown-linux-gnu" |
5 | 6 |
|
6 | 7 | ; Make sure that we can compile the test without crash. |
7 | | -define void @barney(ptr %dst, i1 %arg) { |
8 | | - |
9 | | -; CHECK-LABEL: @barney( |
10 | | -; CHECK: middle.block: |
11 | | - |
12 | | -bb: |
13 | | - br label %bb2 |
14 | | - |
15 | | -bb2: ; preds = %bb2, %bb |
16 | | - %tmp4 = icmp slt i32 undef, 0 |
17 | | - br i1 %tmp4, label %bb2, label %bb5 |
18 | | - |
19 | | -bb5: ; preds = %bb2 |
20 | | - br label %bb19 |
21 | | - |
22 | | -bb18: ; preds = %bb33 |
23 | | - ret void |
24 | | - |
25 | | -bb19: ; preds = %bb36, %bb5 |
26 | | - %tmp21 = phi i64 [ undef, %bb36 ], [ 2, %bb5 ] |
27 | | - %tmp22 = phi i32 [ %tmp65, %bb36 ], [ undef, %bb5 ] |
28 | | - br label %bb50 |
29 | | - |
30 | | -bb33: ; preds = %bb62 |
31 | | - br i1 %arg, label %bb18, label %bb36 |
32 | | - |
33 | | -bb36: ; preds = %bb33 |
34 | | - br label %bb19 |
35 | | - |
36 | | -bb46: ; preds = %bb50 |
37 | | - br i1 %arg, label %bb48, label %bb59 |
38 | | - |
39 | | -bb48: ; preds = %bb46 |
40 | | - %tmp49 = add i32 %tmp52, 14 |
41 | | - ret void |
42 | | - |
43 | | -bb50: ; preds = %bb50, %bb19 |
44 | | - %tmp52 = phi i32 [ %tmp55, %bb50 ], [ %tmp22, %bb19 ] |
45 | | - %tmp53 = phi i64 [ %tmp56, %bb50 ], [ 1, %bb19 ] |
46 | | - %gep = getelementptr inbounds i8, ptr %dst, i64 %tmp53 |
47 | | - store i8 1, ptr %gep |
48 | | - %tmp54 = add i32 %tmp52, 12 |
49 | | - %tmp55 = add i32 %tmp52, 13 |
50 | | - %tmp56 = add nuw nsw i64 %tmp53, 1 |
51 | | - %tmp58 = icmp ult i64 %tmp53, undef |
52 | | - br i1 %tmp58, label %bb50, label %bb46 |
53 | | - |
54 | | -bb59: ; preds = %bb46 |
55 | | - br label %bb62 |
56 | | - |
57 | | -bb62: ; preds = %bb68, %bb59 |
58 | | - %tmp63 = phi i32 [ %tmp65, %bb68 ], [ %tmp55, %bb59 ] |
59 | | - %tmp64 = phi i64 [ %tmp66, %bb68 ], [ %tmp56, %bb59 ] |
60 | | - %tmp65 = add i32 %tmp63, 13 |
61 | | - %tmp66 = add nuw nsw i64 %tmp64, 1 |
62 | | - %tmp67 = icmp ult i64 %tmp66, %tmp21 |
63 | | - br i1 %tmp67, label %bb68, label %bb33 |
64 | | - |
65 | | -bb68: ; preds = %bb62 |
66 | | - br label %bb62 |
67 | | -} |
68 | 8 |
|
69 | 9 | define i32 @foo(ptr addrspace(1) %p) { |
70 | | - |
71 | | -; CHECK-LABEL: foo |
72 | | -; CHECK: middle.block: |
| 10 | +; CHECK-LABEL: define i32 @foo( |
| 11 | +; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) { |
| 12 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 13 | +; CHECK-NEXT: br label %[[OUTER:.*]] |
| 14 | +; CHECK: [[OUTER]]: |
| 15 | +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 0, %[[ENTRY]] ] |
| 16 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[OUTER_LATCH]] ] |
| 17 | +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 |
| 18 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 |
| 19 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 20 | +; CHECK: [[VECTOR_PH]]: |
| 21 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8 |
| 22 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] |
| 23 | +; CHECK-NEXT: [[TMP1:%.*]] = add i32 1, [[N_VEC]] |
| 24 | +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[N_VEC]], 2 |
| 25 | +; CHECK-NEXT: [[TMP3:%.*]] = add i32 6, [[TMP2]] |
| 26 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 27 | +; CHECK: [[VECTOR_BODY]]: |
| 28 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 29 | +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] |
| 30 | +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] |
| 31 | +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 6, i32 8, i32 10, i32 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 32 | +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 8) |
| 33 | +; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[VEC_PHI]], [[VEC_IND]] |
| 34 | +; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[VEC_PHI1]], [[STEP_ADD]] |
| 35 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 |
| 36 | +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 8) |
| 37 | +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| 38 | +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 39 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 40 | +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] |
| 41 | +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]]) |
| 42 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] |
| 43 | +; CHECK-NEXT: br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]] |
| 44 | +; CHECK: [[SCALAR_PH]]: |
| 45 | +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER]] ] |
| 46 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 1, %[[OUTER]] ] |
| 47 | +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ 6, %[[OUTER]] ] |
| 48 | +; CHECK-NEXT: br label %[[INNER:.*]] |
| 49 | +; CHECK: [[INNER]]: |
| 50 | +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP10:%.*]], %[[INNER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] |
| 51 | +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[TMP11:%.*]], %[[INNER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] |
| 52 | +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[TMP9:%.*]], %[[INNER]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ] |
| 53 | +; CHECK-NEXT: [[TMP9]] = add i32 [[B]], 2 |
| 54 | +; CHECK-NEXT: [[TMP10]] = or i32 [[TMP8]], [[B]] |
| 55 | +; CHECK-NEXT: [[TMP11]] = add nuw nsw i32 [[A]], 1 |
| 56 | +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 |
| 57 | +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[IV]], [[TMP12]] |
| 58 | +; CHECK-NEXT: br i1 [[TMP13]], label %[[INNER]], label %[[OUTER_LATCH]], !llvm.loop [[LOOP3:![0-9]+]] |
| 59 | +; CHECK: [[OUTER_LATCH]]: |
| 60 | +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP10]], %[[INNER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] |
| 61 | +; CHECK-NEXT: store atomic i32 [[DOTLCSSA]], ptr addrspace(1) [[P]] unordered, align 4 |
| 62 | +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| 63 | +; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[IV]], 63 |
| 64 | +; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| 65 | +; CHECK-NEXT: br i1 [[TMP14]], label %[[EXIT:.*]], label %[[OUTER]] |
| 66 | +; CHECK: [[EXIT]]: |
| 67 | +; CHECK-NEXT: ret i32 0 |
| 68 | +; |
73 | 69 |
|
74 | 70 | entry: |
75 | 71 | br label %outer |
|
0 commit comments