|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| 2 | +; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-interleaved-mem-accesses -S | FileCheck %s |
| 3 | + |
| 4 | +define void @pr128062(ptr %dst.start, i8 %a, i16 %b) { |
| 5 | +; CHECK-LABEL: define void @pr128062( |
| 6 | +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { |
| 7 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 8 | +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| 9 | +; CHECK: [[VECTOR_PH]]: |
| 10 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 |
| 11 | +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer |
| 12 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 |
| 13 | +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer |
| 14 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 15 | +; CHECK: [[VECTOR_BODY]]: |
| 16 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 17 | +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 |
| 18 | +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] |
| 19 | +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 |
| 20 | +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| 21 | +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> |
| 22 | +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> |
| 23 | +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> |
| 24 | +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> |
| 25 | +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] |
| 26 | +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) |
| 27 | +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> |
| 28 | +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] |
| 29 | +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> |
| 30 | +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] |
| 31 | +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) |
| 32 | +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> |
| 33 | +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] |
| 34 | +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> |
| 35 | +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] |
| 36 | +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) |
| 37 | +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> |
| 38 | +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] |
| 39 | +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> |
| 40 | +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] |
| 41 | +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) |
| 42 | +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> |
| 43 | +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] |
| 44 | +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 45 | +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 46 | +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| 47 | +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| 48 | +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 |
| 49 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| 50 | +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| 51 | +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 52 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 53 | +; CHECK-NEXT: br label %[[EXIT:.*]] |
| 54 | +; CHECK: [[EXIT]]: |
| 55 | +; CHECK-NEXT: ret void |
| 56 | +; |
| 57 | +entry: |
| 58 | + br label %loop |
| 59 | + |
| 60 | +loop: |
| 61 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 62 | + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] |
| 63 | + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 |
| 64 | + %load.dst = load i8, ptr %dst, align 1 |
| 65 | + %dst.ext = zext i8 %load.dst to i16 |
| 66 | + %mul.dst.0 = mul nuw i16 %dst.ext, %b |
| 67 | + %udiv.0 = udiv i16 %mul.dst.0, 255 |
| 68 | + %trunc.0 = trunc nuw i16 %udiv.0 to i8 |
| 69 | + %val.0 = add i8 %a, %trunc.0 |
| 70 | + store i8 %val.0, ptr %dst, align 1 |
| 71 | + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 |
| 72 | + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 |
| 73 | + %dst.1.ext = zext i8 %load.dst.1 to i16 |
| 74 | + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b |
| 75 | + %udiv.1 = udiv i16 %mul.dst.1, 255 |
| 76 | + %trunc.1 = trunc nuw i16 %udiv.1 to i8 |
| 77 | + %val.1 = add i8 %a, %trunc.1 |
| 78 | + store i8 %val.1, ptr %gep.dst.1, align 1 |
| 79 | + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 |
| 80 | + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 |
| 81 | + %dst.2.ext = zext i8 %load.dst.2 to i16 |
| 82 | + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b |
| 83 | + %udiv.2 = udiv i16 %mul.dst.2, 255 |
| 84 | + %trunc.2 = trunc nuw i16 %udiv.2 to i8 |
| 85 | + %val.2 = add i8 %a, %trunc.2 |
| 86 | + store i8 %val.2, ptr %gep.dst.2, align 1 |
| 87 | + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 |
| 88 | + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 |
| 89 | + %dst.3.ext = zext i8 %load.dst.3 to i16 |
| 90 | + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b |
| 91 | + %udiv.3 = udiv i16 %mul.dst.3, 255 |
| 92 | + %trunc.3 = trunc nuw i16 %udiv.3 to i8 |
| 93 | + %val.3 = add i8 %a, %trunc.3 |
| 94 | + store i8 %val.3, ptr %gep.dst.3, align 1 |
| 95 | + %iv.next = add i64 %iv, 4 |
| 96 | + %exit.cond = icmp eq i64 %iv.next, 256 |
| 97 | + br i1 %exit.cond, label %exit, label %loop |
| 98 | + |
| 99 | +exit: |
| 100 | + ret void |
| 101 | +} |
| 102 | + |
| 103 | +; Same as above, except one zext is replaced with an sext. |
| 104 | +define void @opcode_mismatch(ptr %dst.start, i8 %a, i16 %b) { |
| 105 | +; CHECK-LABEL: define void @opcode_mismatch( |
| 106 | +; CHECK-SAME: ptr [[DST_START:%.*]], i8 [[A:%.*]], i16 [[B:%.*]]) { |
| 107 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 108 | +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| 109 | +; CHECK: [[VECTOR_PH]]: |
| 110 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0 |
| 111 | +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer |
| 112 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0 |
| 113 | +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer |
| 114 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 115 | +; CHECK: [[VECTOR_BODY]]: |
| 116 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 117 | +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 |
| 118 | +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[OFFSET_IDX]] |
| 119 | +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 |
| 120 | +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| 121 | +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> |
| 122 | +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> |
| 123 | +; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> |
| 124 | +; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i8> [[STRIDED_VEC]] to <4 x i16> |
| 125 | +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i16> [[TMP0]], [[BROADCAST_SPLAT]] |
| 126 | +; CHECK-NEXT: [[TMP2:%.*]] = udiv <4 x i16> [[TMP1]], splat (i16 255) |
| 127 | +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw <4 x i16> [[TMP2]] to <4 x i8> |
| 128 | +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP3]] |
| 129 | +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[STRIDED_VEC3]] to <4 x i16> |
| 130 | +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw <4 x i16> [[TMP5]], [[BROADCAST_SPLAT]] |
| 131 | +; CHECK-NEXT: [[TMP7:%.*]] = udiv <4 x i16> [[TMP6]], splat (i16 255) |
| 132 | +; CHECK-NEXT: [[TMP8:%.*]] = trunc nuw <4 x i16> [[TMP7]] to <4 x i8> |
| 133 | +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP8]] |
| 134 | +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[STRIDED_VEC4]] to <4 x i16> |
| 135 | +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw <4 x i16> [[TMP10]], [[BROADCAST_SPLAT]] |
| 136 | +; CHECK-NEXT: [[TMP12:%.*]] = udiv <4 x i16> [[TMP11]], splat (i16 255) |
| 137 | +; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw <4 x i16> [[TMP12]] to <4 x i8> |
| 138 | +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP13]] |
| 139 | +; CHECK-NEXT: [[TMP15:%.*]] = zext <4 x i8> [[STRIDED_VEC5]] to <4 x i16> |
| 140 | +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <4 x i16> [[TMP15]], [[BROADCAST_SPLAT]] |
| 141 | +; CHECK-NEXT: [[TMP17:%.*]] = udiv <4 x i16> [[TMP16]], splat (i16 255) |
| 142 | +; CHECK-NEXT: [[TMP18:%.*]] = trunc nuw <4 x i16> [[TMP17]] to <4 x i8> |
| 143 | +; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP18]] |
| 144 | +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 145 | +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP14]], <4 x i8> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 146 | +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |
| 147 | +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> |
| 148 | +; CHECK-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1 |
| 149 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| 150 | +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| 151 | +; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| 152 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 153 | +; CHECK-NEXT: br label %[[EXIT:.*]] |
| 154 | +; CHECK: [[EXIT]]: |
| 155 | +; CHECK-NEXT: ret void |
| 156 | +; |
| 157 | +entry: |
| 158 | + br label %loop |
| 159 | + |
| 160 | +loop: |
| 161 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 162 | + %dst = phi ptr [ %dst.start, %entry ], [ %dst.next, %loop ] |
| 163 | + %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4 |
| 164 | + %load.dst = load i8, ptr %dst, align 1 |
| 165 | + %dst.ext = zext i8 %load.dst to i16 |
| 166 | + %mul.dst.0 = mul nuw i16 %dst.ext, %b |
| 167 | + %udiv.0 = udiv i16 %mul.dst.0, 255 |
| 168 | + %trunc.0 = trunc nuw i16 %udiv.0 to i8 |
| 169 | + %val.0 = add i8 %a, %trunc.0 |
| 170 | + store i8 %val.0, ptr %dst, align 1 |
| 171 | + %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1 |
| 172 | + %load.dst.1 = load i8, ptr %gep.dst.1, align 1 |
| 173 | + %dst.1.ext = sext i8 %load.dst.1 to i16 |
| 174 | + %mul.dst.1 = mul nuw i16 %dst.1.ext, %b |
| 175 | + %udiv.1 = udiv i16 %mul.dst.1, 255 |
| 176 | + %trunc.1 = trunc nuw i16 %udiv.1 to i8 |
| 177 | + %val.1 = add i8 %a, %trunc.1 |
| 178 | + store i8 %val.1, ptr %gep.dst.1, align 1 |
| 179 | + %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2 |
| 180 | + %load.dst.2 = load i8, ptr %gep.dst.2, align 1 |
| 181 | + %dst.2.ext = zext i8 %load.dst.2 to i16 |
| 182 | + %mul.dst.2 = mul nuw i16 %dst.2.ext, %b |
| 183 | + %udiv.2 = udiv i16 %mul.dst.2, 255 |
| 184 | + %trunc.2 = trunc nuw i16 %udiv.2 to i8 |
| 185 | + %val.2 = add i8 %a, %trunc.2 |
| 186 | + store i8 %val.2, ptr %gep.dst.2, align 1 |
| 187 | + %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3 |
| 188 | + %load.dst.3 = load i8, ptr %gep.dst.3, align 1 |
| 189 | + %dst.3.ext = zext i8 %load.dst.3 to i16 |
| 190 | + %mul.dst.3 = mul nuw i16 %dst.3.ext, %b |
| 191 | + %udiv.3 = udiv i16 %mul.dst.3, 255 |
| 192 | + %trunc.3 = trunc nuw i16 %udiv.3 to i8 |
| 193 | + %val.3 = add i8 %a, %trunc.3 |
| 194 | + store i8 %val.3, ptr %gep.dst.3, align 1 |
| 195 | + %iv.next = add i64 %iv, 4 |
| 196 | + %exit.cond = icmp eq i64 %iv.next, 256 |
| 197 | + br i1 %exit.cond, label %exit, label %loop |
| 198 | + |
| 199 | +exit: |
| 200 | + ret void |
| 201 | +} |
0 commit comments