|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: opt -S -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s | FileCheck %s |
| 3 | + |
| 4 | +; When merging two stores with interleaved access vectorization, make sure we |
| 5 | +; propagate the alias information from all scalar stores to form the most |
| 6 | +; generic alias info. |
| 7 | + |
| 8 | +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| 9 | + |
| 10 | +%struct.Vec4r = type { double, double, double, double } |
| 11 | +%struct.Vec2r = type { double, double } |
| 12 | + |
| 13 | +; The new store for the interleave group should alias any double rather than one of the fields of Vec2r. |
| 14 | +define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias %cp, i32 %i) |
| 15 | +; CHECK-LABEL: define void @merge_tbaa_interleave_group( |
| 16 | +; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], ptr noalias [[CP:%.*]], i32 [[I:%.*]]) { |
| 17 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 18 | +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 19 | +; CHECK: [[VECTOR_PH]]: |
| 20 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 21 | +; CHECK: [[VECTOR_BODY]]: |
| 22 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 23 | +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 |
| 24 | +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 |
| 25 | +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[TMP0]], i32 0 |
| 26 | +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 0 |
| 27 | +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA0:![0-9]+]] |
| 28 | +; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP3]], align 8, !tbaa [[TBAA0]] |
| 29 | +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 |
| 30 | +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 |
| 31 | +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], splat (double 2.000000e+00) |
| 32 | +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[TMP0]], i32 0 |
| 33 | +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP0]], i32 1 |
| 34 | +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 1 |
| 35 | +; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8, !tbaa [[TBAA5:![0-9]+]] |
| 36 | +; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP11]], align 8, !tbaa [[TBAA5]] |
| 37 | +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 |
| 38 | +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP13]], i32 1 |
| 39 | +; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], splat (double 3.000000e+00) |
| 40 | +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 41 | +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| 42 | +; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8, !tbaa [[TBAA6:![0-9]+]] |
| 43 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| 44 | +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 |
| 45 | +; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| 46 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 47 | +; CHECK-NEXT: br label %[[EXIT:.*]] |
| 48 | +; CHECK: [[SCALAR_PH]]: |
| 49 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] |
| 50 | +; CHECK-NEXT: br label %[[LOOP:.*]] |
| 51 | +; CHECK: [[LOOP]]: |
| 52 | +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| 53 | +; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[IV]], i32 0 |
| 54 | +; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[X]], align 8, !tbaa [[TBAA0]] |
| 55 | +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP19]], 2.000000e+00 |
| 56 | +; CHECK-NEXT: [[X4:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[IV]], i32 0 |
| 57 | +; CHECK-NEXT: store double [[MUL]], ptr [[X4]], align 8, !tbaa [[TBAA10:![0-9]+]] |
| 58 | +; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[IV]], i32 1 |
| 59 | +; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[Y]], align 8, !tbaa [[TBAA5]] |
| 60 | +; CHECK-NEXT: [[MUL7:%.*]] = fmul double [[TMP20]], 3.000000e+00 |
| 61 | +; CHECK-NEXT: [[Y10:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[IV]], i32 1 |
| 62 | +; CHECK-NEXT: store double [[MUL7]], ptr [[Y10]], align 8, !tbaa [[TBAA12:![0-9]+]] |
| 63 | +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| 64 | +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 |
| 65 | +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] |
| 66 | +; CHECK: [[EXIT]]: |
| 67 | +; CHECK-NEXT: ret void |
| 68 | +; |
| 69 | +{ |
| 70 | +entry: |
| 71 | + br label %loop |
| 72 | + |
| 73 | +loop: ; preds = %loop, %entry |
| 74 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| 75 | + %x = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %iv, i32 0 |
| 76 | + %0 = load double, ptr %x, align 8, !tbaa !3 |
| 77 | + %mul = fmul double %0, 2.000000e+00 |
| 78 | + %x4 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %iv, i32 0 |
| 79 | + store double %mul, ptr %x4, align 8, !tbaa !8 |
| 80 | + %y = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %iv, i32 1 |
| 81 | + %1 = load double, ptr %y, align 8, !tbaa !10 |
| 82 | + %mul7 = fmul double %1, 3.000000e+00 |
| 83 | + %y10 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %iv, i32 1 |
| 84 | + store double %mul7, ptr %y10, align 8, !tbaa !11 |
| 85 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 86 | + %ec = icmp eq i64 %iv.next, 4 |
| 87 | + br i1 %ec, label %exit, label %loop |
| 88 | + |
| 89 | +exit: |
| 90 | + ret void |
| 91 | +} |
| 92 | + |
| 93 | +; Make sure no !tbaa is added to neither the interleave group load nor the store. |
| 94 | +define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) { |
| 95 | +; CHECK-LABEL: define void @ir_tbaa_different( |
| 96 | +; CHECK-SAME: ptr [[BASE:%.*]], ptr [[END:%.*]], ptr [[SRC:%.*]]) { |
| 97 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 98 | +; CHECK-NEXT: [[BASE2:%.*]] = ptrtoint ptr [[BASE]] to i64 |
| 99 | +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 |
| 100 | +; CHECK-NEXT: [[BASE3:%.*]] = ptrtoint ptr [[BASE]] to i64 |
| 101 | +; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64 |
| 102 | +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8 |
| 103 | +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[BASE2]] |
| 104 | +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 |
| 105 | +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 |
| 106 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 |
| 107 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] |
| 108 | +; CHECK: [[VECTOR_MEMCHECK]]: |
| 109 | +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[END2]], -8 |
| 110 | +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[BASE3]] |
| 111 | +; CHECK-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP12]], 3 |
| 112 | +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 |
| 113 | +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], 8 |
| 114 | +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[TMP15]] |
| 115 | +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 4 |
| 116 | +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[BASE]], [[SCEVGEP3]] |
| 117 | +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]] |
| 118 | +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] |
| 119 | +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] |
| 120 | +; CHECK: [[VECTOR_PH]]: |
| 121 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 |
| 122 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] |
| 123 | +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 |
| 124 | +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[TMP4]] |
| 125 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 126 | +; CHECK: [[VECTOR_BODY]]: |
| 127 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 128 | +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 |
| 129 | +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET_IDX]] |
| 130 | +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META14:![0-9]+]] |
| 131 | +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| 132 | +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer |
| 133 | +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 |
| 134 | +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 0, i32 2> |
| 135 | +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 1, i32 3> |
| 136 | +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] |
| 137 | +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[STRIDED_VEC3]], [[BROADCAST_SPLAT]] |
| 138 | +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 139 | +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| 140 | +; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 |
| 141 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| 142 | +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 143 | +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] |
| 144 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 145 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] |
| 146 | +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] |
| 147 | +; CHECK: [[SCALAR_PH]]: |
| 148 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ], [ [[BASE]], %[[VECTOR_MEMCHECK]] ] |
| 149 | +; CHECK-NEXT: br label %[[LOOP:.*]] |
| 150 | +; CHECK: [[LOOP]]: |
| 151 | +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] |
| 152 | +; CHECK-NEXT: [[L_INVAR:%.*]] = load float, ptr [[SRC]], align 4 |
| 153 | +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8 |
| 154 | +; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[PTR_IV]], align 4 |
| 155 | +; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[L_0]], [[L_INVAR]] |
| 156 | +; CHECK-NEXT: store float [[MUL_0]], ptr [[PTR_IV]], align 4, !tbaa [[TBAA10]] |
| 157 | +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 4 |
| 158 | +; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4, !tbaa [[TBAA12]] |
| 159 | +; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[L_1]], [[L_INVAR]] |
| 160 | +; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_1]], align 4 |
| 161 | +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] |
| 162 | +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] |
| 163 | +; CHECK: [[EXIT]]: |
| 164 | +; CHECK-NEXT: ret void |
| 165 | +; |
| 166 | +entry: |
| 167 | + br label %loop |
| 168 | + |
| 169 | +loop: |
| 170 | + %ptr.iv = phi ptr [ %base, %entry ], [ %ptr.iv.next, %loop ] |
| 171 | + %l.invar = load float, ptr %src |
| 172 | + %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8 |
| 173 | + %l.0 = load float, ptr %ptr.iv, align 4 |
| 174 | + %mul.0 = fmul float %l.0, %l.invar |
| 175 | + store float %mul.0, ptr %ptr.iv, align 4, !tbaa !8 |
| 176 | + %gep.1 = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 4 |
| 177 | + %l.1 = load float, ptr %gep.1, align 4, !tbaa !11 |
| 178 | + %mul.1 = fmul float %l.1, %l.invar |
| 179 | + store float %mul.1, ptr %gep.1, align 4 |
| 180 | + %ec = icmp eq ptr %ptr.iv.next, %end |
| 181 | + br i1 %ec, label %exit, label %loop |
| 182 | + |
| 183 | +exit: |
| 184 | + ret void |
| 185 | +} |
| 186 | + |
| 187 | + |
| 188 | +define void @noalias_metadata_from_versioning(ptr %base, ptr %end, ptr %src) { |
| 189 | +; CHECK-LABEL: define void @noalias_metadata_from_versioning( |
| 190 | +; CHECK-SAME: ptr [[BASE:%.*]], ptr [[END:%.*]], ptr [[SRC:%.*]]) { |
| 191 | +; CHECK-NEXT: [[ENTRY:.*]]: |
| 192 | +; CHECK-NEXT: [[BASE2:%.*]] = ptrtoint ptr [[BASE]] to i64 |
| 193 | +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 |
| 194 | +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8 |
| 195 | +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[BASE2]] |
| 196 | +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 |
| 197 | +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 |
| 198 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 |
| 199 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 200 | +; CHECK: [[VECTOR_PH]]: |
| 201 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 |
| 202 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] |
| 203 | +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 |
| 204 | +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[TMP4]] |
| 205 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 206 | +; CHECK: [[VECTOR_BODY]]: |
| 207 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 208 | +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 |
| 209 | +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET_IDX]] |
| 210 | +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 |
| 211 | +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 0, i32 2> |
| 212 | +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 1, i32 3> |
| 213 | +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[STRIDED_VEC]], splat (float 1.000000e+01) |
| 214 | +; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[STRIDED_VEC3]], splat (float 1.000000e+01) |
| 215 | +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 216 | +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> |
| 217 | +; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 |
| 218 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 |
| 219 | +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 220 | +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] |
| 221 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 222 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] |
| 223 | +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] |
| 224 | +; CHECK: [[SCALAR_PH]]: |
| 225 | +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[BASE]], %[[ENTRY]] ] |
| 226 | +; CHECK-NEXT: br label %[[LOOP:.*]] |
| 227 | +; CHECK: [[LOOP]]: |
| 228 | +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] |
| 229 | +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8 |
| 230 | +; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[PTR_IV]], align 4 |
| 231 | +; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[L_0]], 1.000000e+01 |
| 232 | +; CHECK-NEXT: store float [[MUL_0]], ptr [[PTR_IV]], align 4 |
| 233 | +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 4 |
| 234 | +; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4 |
| 235 | +; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[L_1]], 1.000000e+01 |
| 236 | +; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_1]], align 4 |
| 237 | +; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] |
| 238 | +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] |
| 239 | +; CHECK: [[EXIT]]: |
| 240 | +; CHECK-NEXT: ret void |
| 241 | +; |
| 242 | +entry: |
| 243 | + br label %loop |
| 244 | + |
| 245 | +loop: |
| 246 | + %ptr.iv = phi ptr [ %base, %entry ], [ %ptr.iv.next, %loop ] |
| 247 | + %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8 |
| 248 | + %l.0 = load float, ptr %ptr.iv, align 4 |
| 249 | + %mul.0 = fmul float %l.0, 10.0 |
| 250 | + store float %mul.0, ptr %ptr.iv, align 4 |
| 251 | + %gep.1 = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 4 |
| 252 | + %l.1 = load float, ptr %gep.1, align 4 |
| 253 | + %mul.1 = fmul float %l.1, 10.0 |
| 254 | + store float %mul.1, ptr %gep.1, align 4 |
| 255 | + %ec = icmp eq ptr %ptr.iv.next, %end |
| 256 | + br i1 %ec, label %exit, label %loop |
| 257 | + |
| 258 | +exit: |
| 259 | + ret void |
| 260 | +} |
| 261 | + |
| 262 | +!3 = !{!4, !5, i64 0} |
| 263 | +!4 = !{!"Vec4r", !5, i64 0, !5, i64 8, !5, i64 16, !5, i64 24} |
| 264 | +!5 = !{!"double", !6, i64 0} |
| 265 | +!6 = !{!"omnipotent char", !7, i64 0} |
| 266 | +!7 = !{!"Simple C/C++ TBAA"} |
| 267 | +!8 = !{!9, !5, i64 0} |
| 268 | +!9 = !{!"Vec2r", !5, i64 0, !5, i64 8} |
| 269 | +!10 = !{!4, !5, i64 8} |
| 270 | +!11 = !{!9, !5, i64 8} |
| 271 | +;. |
| 272 | +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0} |
| 273 | +; CHECK: [[META1]] = !{!"Vec4r", [[META2]], i64 0, [[META2]], i64 8, [[META2]], i64 16, [[META2]], i64 24} |
| 274 | +; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0} |
| 275 | +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} |
| 276 | +; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} |
| 277 | +; CHECK: [[TBAA5]] = !{[[META1]], [[META2]], i64 8} |
| 278 | +; CHECK: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} |
| 279 | +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} |
| 280 | +; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1} |
| 281 | +; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"} |
| 282 | +; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0} |
| 283 | +; CHECK: [[META11]] = !{!"Vec2r", [[META2]], i64 0, [[META2]], i64 8} |
| 284 | +; CHECK: [[TBAA12]] = !{[[META11]], [[META2]], i64 8} |
| 285 | +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META9]], [[META8]]} |
| 286 | +; CHECK: [[META14]] = !{[[META15:![0-9]+]]} |
| 287 | +; CHECK: [[META15]] = distinct !{[[META15]], [[META16:![0-9]+]]} |
| 288 | +; CHECK: [[META16]] = distinct !{[[META16]], !"LVerDomain"} |
| 289 | +; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META8]], [[META9]]} |
| 290 | +; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META8]]} |
| 291 | +; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META8]], [[META9]]} |
| 292 | +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META9]], [[META8]]} |
| 293 | +;. |
0 commit comments