|
1 | | -; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s |
2 | | - |
3 | | -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 |
| 2 | +; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s |
4 | 3 |
|
5 | 4 | ; Check that we version this loop with speculating the value 1 for symbolic |
6 | 5 | ; strides. This also checks that the symbolic stride information is correctly |
7 | 6 | ; propagated to the memcheck generation. Without this the loop wouldn't |
8 | 7 | ; vectorize because we couldn't determine the array bounds for the required |
9 | 8 | ; memchecks. |
10 | 9 |
|
11 | | -; CHECK-LABEL: test |
12 | | -define void @test(ptr %A, i64 %AStride, |
13 | | - ptr %B, i32 %BStride, |
14 | | - ptr %C, i64 %CStride, i32 %N) { |
| 10 | +define void @test(ptr noalias %A, i64 %AStride, ptr noalias %B, i32 %BStride, ptr noalias %C, i64 %CStride) { |
| 11 | +; CHECK-LABEL: define void @test( |
| 12 | +; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[ASTRIDE:%.*]], ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]], ptr noalias [[C:%.*]], i64 [[CSTRIDE:%.*]]) { |
| 13 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 14 | +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] |
| 15 | +; CHECK: [[VECTOR_SCEVCHECK]]: |
| 16 | +; CHECK-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i32 [[BSTRIDE]], 1 |
| 17 | +; CHECK-NEXT: [[IDENT_CHECK2:%.*]] = icmp ne i64 [[CSTRIDE]], 1 |
| 18 | +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ne i64 [[ASTRIDE]], 1 |
| 19 | +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = or i1 [[IDENT_CHECK1]], [[IDENT_CHECK2]] |
| 20 | +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK6]] |
| 21 | +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 22 | +; CHECK: [[VECTOR_PH]]: |
| 23 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 24 | +; CHECK: [[VECTOR_BODY]]: |
| 25 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 26 | +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 |
| 27 | +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 |
| 28 | +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]] |
| 29 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 |
| 30 | +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] |
| 31 | +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 |
| 32 | +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD]] |
| 33 | +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] |
| 34 | +; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 |
| 35 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| 36 | +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 |
| 37 | +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 38 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 39 | +; CHECK-NEXT: br [[EXIT:label %.*]] |
| 40 | +; CHECK: [[SCALAR_PH]]: |
| 41 | +; |
15 | 42 | entry: |
16 | | - %cmp13 = icmp eq i32 %N, 0 |
17 | | - br i1 %cmp13, label %for.end, label %for.body.preheader |
18 | | - |
19 | | -; CHECK-DAG: icmp ne i64 %AStride, 1 |
20 | | -; CHECK-DAG: icmp ne i32 %BStride, 1 |
21 | | -; CHECK-DAG: icmp ne i64 %CStride, 1 |
22 | | -; CHECK: or |
23 | | -; CHECK: or |
24 | | -; CHECK: br |
25 | | - |
26 | | -; CHECK: vector.body |
27 | | -; CHECK: load <2 x i32> |
| 43 | + br label %loop |
28 | 44 |
|
29 | | -for.body.preheader: |
30 | | - br label %for.body |
31 | | - |
32 | | -for.body: |
33 | | - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] |
34 | | - %iv.trunc = trunc i64 %indvars.iv to i32 |
| 45 | +loop: |
| 46 | + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] |
| 47 | + %iv.trunc = trunc i64 %iv to i32 |
35 | 48 | %mul = mul i32 %iv.trunc, %BStride |
36 | 49 | %mul64 = zext i32 %mul to i64 |
37 | | - %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64 |
38 | | - %0 = load i32, ptr %arrayidx, align 4 |
39 | | - %mul2 = mul nsw i64 %indvars.iv, %CStride |
40 | | - %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2 |
41 | | - %1 = load i32, ptr %arrayidx3, align 4 |
| 50 | + %gep.x = getelementptr inbounds i32, ptr %B, i64 %mul64 |
| 51 | + %0 = load i32, ptr %gep.x, align 4 |
| 52 | + %mul2 = mul nsw i64 %iv, %CStride |
| 53 | + %gep.c = getelementptr inbounds i32, ptr %C, i64 %mul2 |
| 54 | + %1 = load i32, ptr %gep.c, align 4 |
42 | 55 | %mul4 = mul nsw i32 %1, %0 |
43 | | - %mul3 = mul nsw i64 %indvars.iv, %AStride |
44 | | - %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3 |
45 | | - store i32 %mul4, ptr %arrayidx7, align 4 |
46 | | - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
47 | | - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 |
48 | | - %exitcond = icmp eq i32 %lftr.wideiv, %N |
49 | | - br i1 %exitcond, label %for.end.loopexit, label %for.body |
50 | | - |
51 | | -for.end.loopexit: |
52 | | - br label %for.end |
53 | | - |
54 | | -for.end: |
| 56 | + %mul3 = mul nsw i64 %iv, %AStride |
| 57 | + %gep.a = getelementptr inbounds i32, ptr %A, i64 %mul3 |
| 58 | + store i32 %mul4, ptr %gep.a, align 4 |
| 59 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 60 | + %ec = icmp eq i64 %iv.next, 100 |
| 61 | + br i1 %ec, label %exit, label %loop |
| 62 | + |
| 63 | +exit: |
55 | 64 | ret void |
56 | 65 | } |
57 | 66 |
|
58 | 67 | ; We used to crash on this function because we removed the fptosi cast when |
59 | 68 | ; replacing the symbolic stride '%conv'. |
60 | 69 | ; PR18480 |
61 | 70 |
|
62 | | -; CHECK-LABEL: fn1 |
63 | | -; CHECK: load <2 x double> |
64 | | - |
65 | 71 | define void @fn1(ptr noalias %x, ptr noalias %c, double %a) { |
| 72 | +; CHECK-LABEL: define void @fn1( |
| 73 | +; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[C:%.*]], double [[A:%.*]]) { |
| 74 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 75 | +; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[A]] to i32 |
| 76 | +; CHECK-NEXT: [[CONV2:%.*]] = add i32 [[CONV]], 4 |
| 77 | +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV2]], 0 |
| 78 | +; CHECK-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] |
| 79 | +; CHECK: [[LOOP_PREHEADER]]: |
| 80 | +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV2]] to i64 |
| 81 | +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] |
| 82 | +; CHECK: [[VECTOR_SCEVCHECK]]: |
| 83 | +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[CONV]], 1 |
| 84 | +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 85 | +; CHECK: [[VECTOR_PH]]: |
| 86 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 87 | +; CHECK: [[VECTOR_BODY]]: |
| 88 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 89 | +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 |
| 90 | +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 |
| 91 | +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP2]] |
| 92 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 |
| 93 | +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] |
| 94 | +; CHECK-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP4]], align 8 |
| 95 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| 96 | +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 |
| 97 | +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| 98 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 99 | +; CHECK-NEXT: br label %[[SCALAR_PH]] |
| 100 | +; CHECK: [[SCALAR_PH]]: |
| 101 | +; |
66 | 102 | entry: |
67 | 103 | %conv = fptosi double %a to i32 |
68 | 104 | %conv2 = add i32 %conv, 4 |
69 | 105 | %cmp8 = icmp sgt i32 %conv2, 0 |
70 | | - br i1 %cmp8, label %for.body.preheader, label %for.end |
71 | | - |
72 | | -for.body.preheader: |
73 | | - br label %for.body |
| 106 | + br i1 %cmp8, label %loop, label %exit |
74 | 107 |
|
75 | | -for.body: |
76 | | - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] |
77 | | - %0 = trunc i64 %indvars.iv to i32 |
| 108 | +loop: |
| 109 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 110 | + %0 = trunc i64 %iv to i32 |
78 | 111 | %mul = mul nsw i32 %0, %conv |
79 | | - %idxprom = sext i32 %mul to i64 |
80 | | - %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom |
81 | | - %1 = load double, ptr %arrayidx, align 8 |
82 | | - %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv |
83 | | - store double %1, ptr %arrayidx3, align 8 |
84 | | - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |
85 | | - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 |
86 | | - %exitcond = icmp eq i32 %lftr.wideiv, %conv2 |
87 | | - br i1 %exitcond, label %for.end.loopexit, label %for.body |
88 | | - |
89 | | -for.end.loopexit: |
90 | | - br label %for.end |
91 | | - |
92 | | -for.end: |
| 112 | + %mul.ext = sext i32 %mul to i64 |
| 113 | + %gep.x = getelementptr inbounds double, ptr %x, i64 %mul.ext |
| 114 | + %1 = load double, ptr %gep.x, align 8 |
| 115 | + %gep.c = getelementptr inbounds double, ptr %c, i64 %iv |
| 116 | + store double %1, ptr %gep.c, align 8 |
| 117 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 118 | + %iv.trunc = trunc i64 %iv.next to i32 |
| 119 | + %ec = icmp eq i32 %iv.trunc, %conv2 |
| 120 | + br i1 %ec, label %exit, label %loop |
| 121 | + |
| 122 | +exit: |
93 | 123 | ret void |
94 | 124 | } |
0 commit comments