@@ -288,10 +288,56 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
288288; CHECK-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
289289; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] {
290290; CHECK-NEXT: [[ENTRY:.*]]:
291+ ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
292+ ; CHECK: [[VECTOR_PH]]:
293+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
294+ ; CHECK: [[VECTOR_BODY]]:
295+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
296+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
297+ ; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
298+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
299+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
300+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
291301; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
292- ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J]], align 8
302+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
303+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
304+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
305+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
306+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
307+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
308+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
309+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
310+ ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
311+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
312+ ; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
313+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
314+ ; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
315+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
316+ ; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
317+ ; CHECK-NEXT: store i64 0, ptr [[A]], align 8
318+ ; CHECK-NEXT: store i64 0, ptr [[B]], align 8
319+ ; CHECK-NEXT: store i64 0, ptr [[C]], align 8
320+ ; CHECK-NEXT: store i64 0, ptr [[D]], align 8
321+ ; CHECK-NEXT: store i64 0, ptr [[E]], align 8
322+ ; CHECK-NEXT: store i64 0, ptr [[F]], align 8
323+ ; CHECK-NEXT: store i64 0, ptr [[G]], align 8
324+ ; CHECK-NEXT: store i64 0, ptr [[H]], align 8
325+ ; CHECK-NEXT: store i64 0, ptr [[I]], align 8
326+ ; CHECK-NEXT: store i64 0, ptr [[L]], align 8
327+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
328+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
329+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
330+ ; CHECK: [[MIDDLE_BLOCK]]:
331+ ; CHECK-NEXT: br label %[[SCALAR_PH]]
332+ ; CHECK: [[SCALAR_PH]]:
333+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
334+ ; CHECK-NEXT: br label %[[LOOP:.*]]
335+ ; CHECK: [[LOOP]]:
336+ ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
337+ ; CHECK-NEXT: [[GEP_J1:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV1]]
338+ ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J1]], align 8
293339; CHECK-NEXT: [[L_TRUNC:%.*]] = trunc i64 [[L_J]] to i16
294- ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV ]]
340+ ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV1 ]]
295341; CHECK-NEXT: store i16 [[L_TRUNC]], ptr [[GEP_K]], align 2
296342; CHECK-NEXT: store i64 0, ptr [[A]], align 8
297343; CHECK-NEXT: store i64 0, ptr [[B]], align 8
@@ -303,8 +349,9 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
303349; CHECK-NEXT: store i64 0, ptr [[H]], align 8
304350; CHECK-NEXT: store i64 0, ptr [[I]], align 8
305351; CHECK-NEXT: store i64 0, ptr [[L]], align 8
306- ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
307- ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], 14
352+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2
353+ ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1]], 14
354+ ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP10:![0-9]+]]
308355; CHECK: [[EXIT]]:
309356; CHECK-NEXT: ret void
310357;
0 commit comments