@@ -288,38 +288,32 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
288288; CHECK-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
289289; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] {
290290; CHECK-NEXT: [[ENTRY:.*]]:
291- ; CHECK-NEXT: br i1 true , label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
291+ ; CHECK-NEXT: br i1 false , label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
292292; CHECK: [[VECTOR_PH]]:
293293; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
294294; CHECK: [[VECTOR_BODY]]:
295- ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[J]], i64 0
296- ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP0]], align 8
297- ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
298- ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[STRIDED_VEC]] to <8 x i16>
299- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 0
300- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 2
301- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 4
302- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 6
303- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[K]], i64 8
304- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[K]], i64 10
305- ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[K]], i64 12
306- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[K]], i64 14
307- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0
308- ; CHECK-NEXT: store i16 [[TMP14]], ptr [[TMP6]], align 2
309- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i16> [[TMP1]], i32 1
310- ; CHECK-NEXT: store i16 [[TMP15]], ptr [[TMP7]], align 2
311- ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[TMP1]], i32 2
312- ; CHECK-NEXT: store i16 [[TMP16]], ptr [[TMP8]], align 2
313- ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
314- ; CHECK-NEXT: store i16 [[TMP17]], ptr [[TMP9]], align 2
315- ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i16> [[TMP1]], i32 4
316- ; CHECK-NEXT: store i16 [[TMP18]], ptr [[TMP10]], align 2
317- ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP1]], i32 5
318- ; CHECK-NEXT: store i16 [[TMP19]], ptr [[TMP11]], align 2
319- ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i16> [[TMP1]], i32 6
320- ; CHECK-NEXT: store i16 [[TMP20]], ptr [[TMP12]], align 2
321- ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
322- ; CHECK-NEXT: store i16 [[TMP21]], ptr [[TMP13]], align 2
295+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
296+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
297+ ; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
298+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
299+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
300+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
301+ ; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
302+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
303+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
304+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
305+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
306+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
307+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
308+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
309+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
310+ ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
311+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
312+ ; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
313+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
314+ ; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
315+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
316+ ; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
323317; CHECK-NEXT: store i64 0, ptr [[A]], align 8
324318; CHECK-NEXT: store i64 0, ptr [[B]], align 8
325319; CHECK-NEXT: store i64 0, ptr [[C]], align 8
@@ -330,18 +324,20 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
330324; CHECK-NEXT: store i64 0, ptr [[H]], align 8
331325; CHECK-NEXT: store i64 0, ptr [[I]], align 8
332326; CHECK-NEXT: store i64 0, ptr [[L]], align 8
333- ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
327+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
328+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
329+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
334330; CHECK: [[MIDDLE_BLOCK]]:
335331; CHECK-NEXT: br label %[[SCALAR_PH]]
336332; CHECK: [[SCALAR_PH]]:
337- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0 , %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
333+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8 , %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
338334; CHECK-NEXT: br label %[[LOOP:.*]]
339335; CHECK: [[LOOP]]:
340- ; CHECK-NEXT: [[IV :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
341- ; CHECK-NEXT: [[GEP_J :%.*]] = getelementptr i64, ptr [[J]], i64 [[IV ]]
342- ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J ]], align 8
336+ ; CHECK-NEXT: [[IV1 :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
337+ ; CHECK-NEXT: [[GEP_J1 :%.*]] = getelementptr i64, ptr [[J]], i64 [[IV1 ]]
338+ ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J1 ]], align 8
343339; CHECK-NEXT: [[L_TRUNC:%.*]] = trunc i64 [[L_J]] to i16
344- ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV ]]
340+ ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV1 ]]
345341; CHECK-NEXT: store i16 [[L_TRUNC]], ptr [[GEP_K]], align 2
346342; CHECK-NEXT: store i64 0, ptr [[A]], align 8
347343; CHECK-NEXT: store i64 0, ptr [[B]], align 8
@@ -353,9 +349,9 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
353349; CHECK-NEXT: store i64 0, ptr [[H]], align 8
354350; CHECK-NEXT: store i64 0, ptr [[I]], align 8
355351; CHECK-NEXT: store i64 0, ptr [[L]], align 8
356- ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV ]], 2
357- ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV ]], 14
358- ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8 :![0-9]+]]
352+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1 ]], 2
353+ ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1 ]], 14
354+ ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP10 :![0-9]+]]
359355; CHECK: [[EXIT]]:
360356; CHECK-NEXT: ret void
361357;
0 commit comments