@@ -16,7 +16,7 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
1616; CHECK-NEXT: br i1 [[CMP_28]], label [[FOR_COND_CLEANUP:%.*]], label [[ITER_CHECK:%.*]]
1717; CHECK: iter.check:
1818; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
19- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
19+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
2020; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
2121; CHECK: vector.main.loop.iter.check:
2222; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[N]], 16
@@ -50,33 +50,33 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
5050; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
5151; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
5252; CHECK: vec.epilog.iter.check:
53- ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 8
54- ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT :%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
55- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT ]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
53+ ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 12
54+ ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK :%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
55+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK ]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
5656; CHECK: vec.epilog.ph:
5757; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
58- ; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967288
58+ ; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967292
5959; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
6060; CHECK: vec.epilog.vector.body:
6161; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
6262; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX7]]
63- ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
63+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1
6464; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX7]]
65- ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
66- ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16>
67- ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16>
68- ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]]
69- ; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
70- ; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8>
71- ; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1
65+ ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1
66+ ; CHECK-NEXT: [[TMP16:%.*]] = zext <4 x i8> [[WIDE_LOAD9]] to <4 x i16>
67+ ; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i8> [[WIDE_LOAD8]] to <4 x i16>
68+ ; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <4 x i16> [[TMP16]], [[TMP17]]
69+ ; CHECK-NEXT: [[TMP19:%.*]] = lshr <4 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8>
70+ ; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <4 x i16> [[TMP19]] to <4 x i8>
71+ ; CHECK-NEXT: store <4 x i8> [[TMP20]], ptr [[TMP15]], align 1
7272; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX7]]
73- ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
74- ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD10]] to <8 x i16>
75- ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]]
76- ; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
77- ; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8>
78- ; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1
79- ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 8
73+ ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1
74+ ; CHECK-NEXT: [[TMP22:%.*]] = zext <4 x i8> [[WIDE_LOAD10]] to <4 x i16>
75+ ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <4 x i16> [[TMP22]], [[TMP17]]
76+ ; CHECK-NEXT: [[TMP24:%.*]] = lshr <4 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8>
77+ ; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <4 x i16> [[TMP24]] to <4 x i8>
78+ ; CHECK-NEXT: store <4 x i8> [[TMP25]], ptr [[TMP21]], align 1
79+ ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 4
8080; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]]
8181; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
8282; CHECK: vec.epilog.middle.block:
0 commit comments