@@ -487,15 +487,15 @@ exit: ; preds = %vector.body
487487
488488; On Cortex-A55 we should runtime unroll the scalar epilogue loop, but not the
489489; vector loop.
490- define void @scalar_epilogue (i64 %N , ptr %p , i8 %val ) {
490+ define void @scalar_epilogue (ptr %p , i8 %splat.scalar , i64 %n ) {
491491; APPLE-LABEL: define void @scalar_epilogue(
492- ; APPLE-SAME: i64 [[N :%.*]], ptr [[P :%.*]], i8 [[VAL :%.*]]) #[[ATTR0]] {
492+ ; APPLE-SAME: ptr [[P :%.*]], i8 [[SPLAT_SCALAR :%.*]], i64 [[N :%.*]]) #[[ATTR0]] {
493493; APPLE-NEXT: [[ENTRY:.*]]:
494494; APPLE-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
495- ; APPLE-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[FOR_BODY_PREHEADER :.*]], label %[[VECTOR_PH:.*]]
495+ ; APPLE-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER :.*]], label %[[VECTOR_PH:.*]]
496496; APPLE: [[VECTOR_PH]]:
497497; APPLE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
498- ; APPLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[VAL ]], i64 0
498+ ; APPLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR ]], i64 0
499499; APPLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
500500; APPLE-NEXT: br label %[[VECTOR_BODY:.*]]
501501; APPLE: [[VECTOR_BODY]]:
@@ -513,32 +513,32 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
513513; APPLE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
514514; APPLE: [[MIDDLE_BLOCK]]:
515515; APPLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
516- ; APPLE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER ]]
517- ; APPLE: [[FOR_BODY_PREHEADER ]]:
518- ; APPLE-NEXT: [[I_06_PH :%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
519- ; APPLE-NEXT: br label %[[FOR_BODY :.*]]
520- ; APPLE: [[FOR_BODY ]]:
521- ; APPLE-NEXT: [[I_06:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY ]] ], [ [[I_06_PH ]], %[[FOR_BODY_PREHEADER ]] ]
516+ ; APPLE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER ]]
517+ ; APPLE: [[SCALAR_REMAINDER_PREHEADER ]]:
518+ ; APPLE-NEXT: [[IV_SCALAR_LOOP_PH :%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
519+ ; APPLE-NEXT: br label %[[SCALAR_REMAINDER :.*]]
520+ ; APPLE: [[SCALAR_REMAINDER ]]:
521+ ; APPLE-NEXT: [[I_06:%.*]] = phi i64 [ [[INC:%.*]], %[[SCALAR_REMAINDER ]] ], [ [[IV_SCALAR_LOOP_PH ]], %[[SCALAR_REMAINDER_PREHEADER ]] ]
522522; APPLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
523523; APPLE-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
524- ; APPLE-NEXT: [[ADD:%.*]] = add i8 [[TMP8]], [[VAL ]]
524+ ; APPLE-NEXT: [[ADD:%.*]] = add i8 [[TMP8]], [[SPLAT_SCALAR ]]
525525; APPLE-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
526526; APPLE-NEXT: [[INC]] = add nuw i64 [[I_06]], 1
527527; APPLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
528- ; APPLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY ]], !llvm.loop [[LOOP5:![0-9]+]]
528+ ; APPLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER ]], !llvm.loop [[LOOP5:![0-9]+]]
529529; APPLE: [[EXIT_LOOPEXIT]]:
530530; APPLE-NEXT: br label %[[EXIT]]
531531; APPLE: [[EXIT]]:
532532; APPLE-NEXT: ret void
533533;
534534; CORTEXA55-LABEL: define void @scalar_epilogue(
535- ; CORTEXA55-SAME: i64 [[N :%.*]], ptr [[P :%.*]], i8 [[VAL :%.*]]) #[[ATTR0]] {
535+ ; CORTEXA55-SAME: ptr [[P :%.*]], i8 [[SPLAT_SCALAR :%.*]], i64 [[N :%.*]]) #[[ATTR0]] {
536536; CORTEXA55-NEXT: [[ENTRY:.*]]:
537537; CORTEXA55-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
538- ; CORTEXA55-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[FOR_BODY_PREHEADER :.*]], label %[[VECTOR_PH:.*]]
538+ ; CORTEXA55-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER :.*]], label %[[VECTOR_PH:.*]]
539539; CORTEXA55: [[VECTOR_PH]]:
540540; CORTEXA55-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
541- ; CORTEXA55-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[VAL ]], i64 0
541+ ; CORTEXA55-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR ]], i64 0
542542; CORTEXA55-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
543543; CORTEXA55-NEXT: br label %[[VECTOR_BODY:.*]]
544544; CORTEXA55: [[VECTOR_BODY]]:
@@ -556,73 +556,73 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
556556; CORTEXA55-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
557557; CORTEXA55: [[MIDDLE_BLOCK]]:
558558; CORTEXA55-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
559- ; CORTEXA55-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER ]]
560- ; CORTEXA55: [[FOR_BODY_PREHEADER ]]:
559+ ; CORTEXA55-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER ]]
560+ ; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER ]]:
561561; CORTEXA55-NEXT: [[I_06_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
562562; CORTEXA55-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[I_06_PH]]
563563; CORTEXA55-NEXT: [[TMP9:%.*]] = add i64 [[N]], -1
564564; CORTEXA55-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], [[I_06_PH]]
565565; CORTEXA55-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP8]], 3
566566; CORTEXA55-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
567- ; CORTEXA55-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER :.*]], label %[[FOR_BODY_PROL_LOOPEXIT :.*]]
568- ; CORTEXA55: [[FOR_BODY_PROL_PREHEADER ]]:
569- ; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL :.*]]
570- ; CORTEXA55: [[FOR_BODY_PROL ]]:
567+ ; CORTEXA55-NEXT: br i1 [[LCMP_MOD]], label %[[SCALAR_REMAINDER_PROL_PREHEADER :.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT :.*]]
568+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL_PREHEADER ]]:
569+ ; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL :.*]]
570+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL ]]:
571571; CORTEXA55-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06_PH]]
572572; CORTEXA55-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX_PROL]], align 1
573- ; CORTEXA55-NEXT: [[ADD_PROL:%.*]] = add i8 [[TMP11]], [[VAL ]]
573+ ; CORTEXA55-NEXT: [[ADD_PROL:%.*]] = add i8 [[TMP11]], [[SPLAT_SCALAR ]]
574574; CORTEXA55-NEXT: store i8 [[ADD_PROL]], ptr [[ARRAYIDX_PROL]], align 1
575575; CORTEXA55-NEXT: [[INC_PROL:%.*]] = add nuw i64 [[I_06_PH]], 1
576576; CORTEXA55-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 1, [[XTRAITER]]
577- ; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL_1 :.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA :.*]]
578- ; CORTEXA55: [[FOR_BODY_PROL_1 ]]:
577+ ; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP]], label %[[SCALAR_REMAINDER_PROL_1 :.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA :.*]]
578+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL_1 ]]:
579579; CORTEXA55-NEXT: [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL]]
580580; CORTEXA55-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX_PROL_1]], align 1
581- ; CORTEXA55-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[TMP12]], [[VAL ]]
581+ ; CORTEXA55-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[TMP12]], [[SPLAT_SCALAR ]]
582582; CORTEXA55-NEXT: store i8 [[ADD_PROL_1]], ptr [[ARRAYIDX_PROL_1]], align 1
583583; CORTEXA55-NEXT: [[INC_PROL_1:%.*]] = add nuw i64 [[I_06_PH]], 2
584584; CORTEXA55-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i64 2, [[XTRAITER]]
585- ; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[FOR_BODY_PROL_2 :.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA ]]
586- ; CORTEXA55: [[FOR_BODY_PROL_2 ]]:
585+ ; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[SCALAR_REMAINDER_PROL_2 :.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA ]]
586+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL_2 ]]:
587587; CORTEXA55-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_1]]
588588; CORTEXA55-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX_PROL_2]], align 1
589- ; CORTEXA55-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[TMP13]], [[VAL ]]
589+ ; CORTEXA55-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[TMP13]], [[SPLAT_SCALAR ]]
590590; CORTEXA55-NEXT: store i8 [[ADD_PROL_2]], ptr [[ARRAYIDX_PROL_2]], align 1
591591; CORTEXA55-NEXT: [[INC_PROL_2:%.*]] = add nuw i64 [[I_06_PH]], 3
592- ; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA ]]
593- ; CORTEXA55: [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA ]]:
594- ; CORTEXA55-NEXT: [[I_06_UNR_PH :%.*]] = phi i64 [ [[INC_PROL]], %[[FOR_BODY_PROL ]] ], [ [[INC_PROL_1]], %[[FOR_BODY_PROL_1 ]] ], [ [[INC_PROL_2]], %[[FOR_BODY_PROL_2 ]] ]
595- ; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT ]]
596- ; CORTEXA55: [[FOR_BODY_PROL_LOOPEXIT ]]:
597- ; CORTEXA55-NEXT: [[I_06_UNR :%.*]] = phi i64 [ [[I_06_PH]], %[[FOR_BODY_PREHEADER ]] ], [ [[I_06_UNR_PH ]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA ]] ]
592+ ; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA ]]
593+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA ]]:
594+ ; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR_PH :%.*]] = phi i64 [ [[INC_PROL]], %[[SCALAR_REMAINDER_PROL ]] ], [ [[INC_PROL_1]], %[[SCALAR_REMAINDER_PROL_1 ]] ], [ [[INC_PROL_2]], %[[SCALAR_REMAINDER_PROL_2 ]] ]
595+ ; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT ]]
596+ ; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT ]]:
597+ ; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR :%.*]] = phi i64 [ [[I_06_PH]], %[[SCALAR_REMAINDER_PREHEADER ]] ], [ [[IV_SCALAR_LOOP_UNR_PH ]], %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA ]] ]
598598; CORTEXA55-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP10]], 3
599- ; CORTEXA55-NEXT: br i1 [[TMP14]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER_NEW :.*]]
600- ; CORTEXA55: [[FOR_BODY_PREHEADER_NEW ]]:
601- ; CORTEXA55-NEXT: br label %[[FOR_BODY :.*]]
602- ; CORTEXA55: [[FOR_BODY ]]:
603- ; CORTEXA55-NEXT: [[I_06:%.*]] = phi i64 [ [[I_06_UNR ]], %[[FOR_BODY_PREHEADER_NEW ]] ], [ [[INC_3:%.*]], %[[FOR_BODY ]] ]
599+ ; CORTEXA55-NEXT: br i1 [[TMP14]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER_NEW :.*]]
600+ ; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER_NEW ]]:
601+ ; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER :.*]]
602+ ; CORTEXA55: [[SCALAR_REMAINDER ]]:
603+ ; CORTEXA55-NEXT: [[I_06:%.*]] = phi i64 [ [[IV_SCALAR_LOOP_UNR ]], %[[SCALAR_REMAINDER_PREHEADER_NEW ]] ], [ [[INC_3:%.*]], %[[SCALAR_REMAINDER ]] ]
604604; CORTEXA55-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
605605; CORTEXA55-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
606- ; CORTEXA55-NEXT: [[ADD:%.*]] = add i8 [[TMP15]], [[VAL ]]
606+ ; CORTEXA55-NEXT: [[ADD:%.*]] = add i8 [[TMP15]], [[SPLAT_SCALAR ]]
607607; CORTEXA55-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
608608; CORTEXA55-NEXT: [[INC:%.*]] = add nuw i64 [[I_06]], 1
609609; CORTEXA55-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC]]
610610; CORTEXA55-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
611- ; CORTEXA55-NEXT: [[ADD_1:%.*]] = add i8 [[TMP16]], [[VAL ]]
611+ ; CORTEXA55-NEXT: [[ADD_1:%.*]] = add i8 [[TMP16]], [[SPLAT_SCALAR ]]
612612; CORTEXA55-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX_1]], align 1
613613; CORTEXA55-NEXT: [[INC_1:%.*]] = add nuw i64 [[I_06]], 2
614614; CORTEXA55-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_1]]
615615; CORTEXA55-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
616- ; CORTEXA55-NEXT: [[ADD_2:%.*]] = add i8 [[TMP17]], [[VAL ]]
616+ ; CORTEXA55-NEXT: [[ADD_2:%.*]] = add i8 [[TMP17]], [[SPLAT_SCALAR ]]
617617; CORTEXA55-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX_2]], align 1
618618; CORTEXA55-NEXT: [[INC_2:%.*]] = add nuw i64 [[I_06]], 3
619619; CORTEXA55-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_2]]
620620; CORTEXA55-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
621- ; CORTEXA55-NEXT: [[ADD_3:%.*]] = add i8 [[TMP18]], [[VAL ]]
621+ ; CORTEXA55-NEXT: [[ADD_3:%.*]] = add i8 [[TMP18]], [[SPLAT_SCALAR ]]
622622; CORTEXA55-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX_3]], align 1
623623; CORTEXA55-NEXT: [[INC_3]] = add nuw i64 [[I_06]], 4
624624; CORTEXA55-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INC_3]], [[N]]
625- ; CORTEXA55-NEXT: br i1 [[EXITCOND_NOT_3]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY ]], !llvm.loop [[LOOP3:![0-9]+]]
625+ ; CORTEXA55-NEXT: br i1 [[EXITCOND_NOT_3]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[SCALAR_REMAINDER ]], !llvm.loop [[LOOP3:![0-9]+]]
626626; CORTEXA55: [[EXIT_LOOPEXIT_UNR_LCSSA]]:
627627; CORTEXA55-NEXT: br label %[[EXIT_LOOPEXIT]]
628628; CORTEXA55: [[EXIT_LOOPEXIT]]:
@@ -631,42 +631,42 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
631631; CORTEXA55-NEXT: ret void
632632;
633633entry:
634- %min.iters.check = icmp ult i64 %N , 32
635- br i1 %min.iters.check , label %for.body , label %vector.ph
634+ %min.iters.check = icmp ult i64 %n , 32
635+ br i1 %min.iters.check , label %scalar.remainder , label %vector.ph
636636
637637vector.ph:
638- %n.vec = and i64 %N , -32
639- %broadcast.splatinsert = insertelement <16 x i8 > poison, i8 %val , i64 0
638+ %n.vec = and i64 %n , -32
639+ %broadcast.splatinsert = insertelement <16 x i8 > poison, i8 %splat.scalar , i64 0
640640 %broadcast.splat = shufflevector <16 x i8 > %broadcast.splatinsert , <16 x i8 > poison, <16 x i32 > zeroinitializer
641641 br label %vector.body
642642
643643vector.body:
644- %index = phi i64 [ 0 , %vector.ph ], [ %index .next , %vector.body ]
645- %0 = getelementptr inbounds nuw i8 , ptr %p , i64 %index
646- %1 = getelementptr inbounds nuw i8 , ptr %0 , i64 16
647- %wide.load = load <16 x i8 >, ptr %0 , align 1
648- %wide.load8 = load <16 x i8 >, ptr %1 , align 1
649- %2 = add <16 x i8 > %wide.load , %broadcast.splat
650- %3 = add <16 x i8 > %wide.load8 , %broadcast.splat
651- store <16 x i8 > %2 , ptr %0 , align 1
652- store <16 x i8 > %3 , ptr %1 , align 1
653- %index .next = add nuw i64 %index , 32
654- %4 = icmp eq i64 %index .next , %n.vec
655- br i1 %4 , label %middle.block , label %vector.body , !llvm.loop !2
644+ %iv = phi i64 [ 0 , %vector.ph ], [ %iv .next , %vector.body ]
645+ %gep.p.iv = getelementptr inbounds nuw i8 , ptr %p , i64 %iv
646+ %gep.p.iv.16 = getelementptr inbounds nuw i8 , ptr %gep.p.iv , i64 16
647+ %wide.load = load <16 x i8 >, ptr %gep.p.iv , align 1
648+ %wide.load.2 = load <16 x i8 >, ptr %gep.p.iv.16 , align 1
649+ %add.broadcast = add <16 x i8 > %wide.load , %broadcast.splat
650+ %add.broadcast.2 = add <16 x i8 > %wide.load.2 , %broadcast.splat
651+ store <16 x i8 > %add.broadcast , ptr %gep.p.iv , align 1
652+ store <16 x i8 > %add.broadcast.2 , ptr %gep.p.iv.16 , align 1
653+ %iv .next = add nuw i64 %iv , 32
654+ %exit.cond = icmp eq i64 %iv .next , %n.vec
655+ br i1 %exit.cond , label %middle.block , label %vector.body , !llvm.loop !2
656656
657657middle.block:
658- %cmp.n = icmp eq i64 %N , %n.vec
659- br i1 %cmp.n , label %exit , label %for.body
658+ %cmp.n = icmp eq i64 %n , %n.vec
659+ br i1 %cmp.n , label %exit , label %scalar.remainder
660660
661- for.body :
662- %i.06 = phi i64 [ %inc , %for.body ], [ %n.vec , %middle.block ], [ 0 , %entry ]
663- %arrayidx = getelementptr inbounds nuw i8 , ptr %p , i64 %i.06
664- %8 = load i8 , ptr %arrayidx , align 1
665- %add = add i8 %8 , %val
661+ scalar.remainder :
662+ %iv.scalar.loop = phi i64 [ %inc , %scalar.remainder ], [ %n.vec , %middle.block ], [ 0 , %entry ]
663+ %arrayidx = getelementptr inbounds nuw i8 , ptr %p , i64 %iv.scalar.loop
664+ %scalar.load = load i8 , ptr %arrayidx , align 1
665+ %add = add i8 %scalar.load , %splat.scalar
666666 store i8 %add , ptr %arrayidx , align 1
667- %inc = add nuw i64 %i.06 , 1
668- %exitcond.not = icmp eq i64 %inc , %N
669- br i1 %exitcond.not , label %exit , label %for.body , !llvm.loop !3
667+ %inc = add nuw i64 %iv.scalar.loop , 1
668+ %exitcond.not = icmp eq i64 %inc , %n
669+ br i1 %exitcond.not , label %exit , label %scalar.remainder , !llvm.loop !3
670670
671671exit:
672672 ret void
0 commit comments