Skip to content

Commit 1433c1c

Browse files
Update test based on review comments
1 parent 7d22ee5 commit 1433c1c

File tree

1 file changed

+70
-70
lines changed
  • llvm/test/Transforms/LoopUnroll/AArch64

1 file changed

+70
-70
lines changed

llvm/test/Transforms/LoopUnroll/AArch64/vector.ll

Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -487,15 +487,15 @@ exit: ; preds = %vector.body
487487

488488
; On Cortex-A55 we should runtime unroll the scalar epilogue loop, but not the
489489
; vector loop.
490-
define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
490+
define void @scalar_epilogue(ptr %p, i8 %splat.scalar, i64 %n) {
491491
; APPLE-LABEL: define void @scalar_epilogue(
492-
; APPLE-SAME: i64 [[N:%.*]], ptr [[P:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
492+
; APPLE-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
493493
; APPLE-NEXT: [[ENTRY:.*]]:
494494
; APPLE-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
495-
; APPLE-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
495+
; APPLE-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
496496
; APPLE: [[VECTOR_PH]]:
497497
; APPLE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
498-
; APPLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
498+
; APPLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
499499
; APPLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
500500
; APPLE-NEXT: br label %[[VECTOR_BODY:.*]]
501501
; APPLE: [[VECTOR_BODY]]:
@@ -513,32 +513,32 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
513513
; APPLE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
514514
; APPLE: [[MIDDLE_BLOCK]]:
515515
; APPLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
516-
; APPLE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER]]
517-
; APPLE: [[FOR_BODY_PREHEADER]]:
518-
; APPLE-NEXT: [[I_06_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
519-
; APPLE-NEXT: br label %[[FOR_BODY:.*]]
520-
; APPLE: [[FOR_BODY]]:
521-
; APPLE-NEXT: [[I_06:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[I_06_PH]], %[[FOR_BODY_PREHEADER]] ]
516+
; APPLE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
517+
; APPLE: [[SCALAR_REMAINDER_PREHEADER]]:
518+
; APPLE-NEXT: [[IV_SCALAR_LOOP_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
519+
; APPLE-NEXT: br label %[[SCALAR_REMAINDER:.*]]
520+
; APPLE: [[SCALAR_REMAINDER]]:
521+
; APPLE-NEXT: [[I_06:%.*]] = phi i64 [ [[INC:%.*]], %[[SCALAR_REMAINDER]] ], [ [[IV_SCALAR_LOOP_PH]], %[[SCALAR_REMAINDER_PREHEADER]] ]
522522
; APPLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
523523
; APPLE-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
524-
; APPLE-NEXT: [[ADD:%.*]] = add i8 [[TMP8]], [[VAL]]
524+
; APPLE-NEXT: [[ADD:%.*]] = add i8 [[TMP8]], [[SPLAT_SCALAR]]
525525
; APPLE-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
526526
; APPLE-NEXT: [[INC]] = add nuw i64 [[I_06]], 1
527527
; APPLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
528-
; APPLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
528+
; APPLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP5:![0-9]+]]
529529
; APPLE: [[EXIT_LOOPEXIT]]:
530530
; APPLE-NEXT: br label %[[EXIT]]
531531
; APPLE: [[EXIT]]:
532532
; APPLE-NEXT: ret void
533533
;
534534
; CORTEXA55-LABEL: define void @scalar_epilogue(
535-
; CORTEXA55-SAME: i64 [[N:%.*]], ptr [[P:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
535+
; CORTEXA55-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
536536
; CORTEXA55-NEXT: [[ENTRY:.*]]:
537537
; CORTEXA55-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
538-
; CORTEXA55-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
538+
; CORTEXA55-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
539539
; CORTEXA55: [[VECTOR_PH]]:
540540
; CORTEXA55-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
541-
; CORTEXA55-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
541+
; CORTEXA55-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
542542
; CORTEXA55-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
543543
; CORTEXA55-NEXT: br label %[[VECTOR_BODY:.*]]
544544
; CORTEXA55: [[VECTOR_BODY]]:
@@ -556,73 +556,73 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
556556
; CORTEXA55-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
557557
; CORTEXA55: [[MIDDLE_BLOCK]]:
558558
; CORTEXA55-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
559-
; CORTEXA55-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER]]
560-
; CORTEXA55: [[FOR_BODY_PREHEADER]]:
559+
; CORTEXA55-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
560+
; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER]]:
561561
; CORTEXA55-NEXT: [[I_06_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
562562
; CORTEXA55-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[I_06_PH]]
563563
; CORTEXA55-NEXT: [[TMP9:%.*]] = add i64 [[N]], -1
564564
; CORTEXA55-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], [[I_06_PH]]
565565
; CORTEXA55-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP8]], 3
566566
; CORTEXA55-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
567-
; CORTEXA55-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_BODY_PROL_PREHEADER:.*]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]]
568-
; CORTEXA55: [[FOR_BODY_PROL_PREHEADER]]:
569-
; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL:.*]]
570-
; CORTEXA55: [[FOR_BODY_PROL]]:
567+
; CORTEXA55-NEXT: br i1 [[LCMP_MOD]], label %[[SCALAR_REMAINDER_PROL_PREHEADER:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT:.*]]
568+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_PREHEADER]]:
569+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL:.*]]
570+
; CORTEXA55: [[SCALAR_REMAINDER_PROL]]:
571571
; CORTEXA55-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06_PH]]
572572
; CORTEXA55-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX_PROL]], align 1
573-
; CORTEXA55-NEXT: [[ADD_PROL:%.*]] = add i8 [[TMP11]], [[VAL]]
573+
; CORTEXA55-NEXT: [[ADD_PROL:%.*]] = add i8 [[TMP11]], [[SPLAT_SCALAR]]
574574
; CORTEXA55-NEXT: store i8 [[ADD_PROL]], ptr [[ARRAYIDX_PROL]], align 1
575575
; CORTEXA55-NEXT: [[INC_PROL:%.*]] = add nuw i64 [[I_06_PH]], 1
576576
; CORTEXA55-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 1, [[XTRAITER]]
577-
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP]], label %[[FOR_BODY_PROL_1:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]]
578-
; CORTEXA55: [[FOR_BODY_PROL_1]]:
577+
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP]], label %[[SCALAR_REMAINDER_PROL_1:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA:.*]]
578+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_1]]:
579579
; CORTEXA55-NEXT: [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL]]
580580
; CORTEXA55-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX_PROL_1]], align 1
581-
; CORTEXA55-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[TMP12]], [[VAL]]
581+
; CORTEXA55-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[TMP12]], [[SPLAT_SCALAR]]
582582
; CORTEXA55-NEXT: store i8 [[ADD_PROL_1]], ptr [[ARRAYIDX_PROL_1]], align 1
583583
; CORTEXA55-NEXT: [[INC_PROL_1:%.*]] = add nuw i64 [[I_06_PH]], 2
584584
; CORTEXA55-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i64 2, [[XTRAITER]]
585-
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[FOR_BODY_PROL_2:.*]], label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
586-
; CORTEXA55: [[FOR_BODY_PROL_2]]:
585+
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[SCALAR_REMAINDER_PROL_2:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
586+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_2]]:
587587
; CORTEXA55-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_1]]
588588
; CORTEXA55-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX_PROL_2]], align 1
589-
; CORTEXA55-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[TMP13]], [[VAL]]
589+
; CORTEXA55-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[TMP13]], [[SPLAT_SCALAR]]
590590
; CORTEXA55-NEXT: store i8 [[ADD_PROL_2]], ptr [[ARRAYIDX_PROL_2]], align 1
591591
; CORTEXA55-NEXT: [[INC_PROL_2:%.*]] = add nuw i64 [[I_06_PH]], 3
592-
; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]
593-
; CORTEXA55: [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
594-
; CORTEXA55-NEXT: [[I_06_UNR_PH:%.*]] = phi i64 [ [[INC_PROL]], %[[FOR_BODY_PROL]] ], [ [[INC_PROL_1]], %[[FOR_BODY_PROL_1]] ], [ [[INC_PROL_2]], %[[FOR_BODY_PROL_2]] ]
595-
; CORTEXA55-NEXT: br label %[[FOR_BODY_PROL_LOOPEXIT]]
596-
; CORTEXA55: [[FOR_BODY_PROL_LOOPEXIT]]:
597-
; CORTEXA55-NEXT: [[I_06_UNR:%.*]] = phi i64 [ [[I_06_PH]], %[[FOR_BODY_PREHEADER]] ], [ [[I_06_UNR_PH]], %[[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
592+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
593+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]:
594+
; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR_PH:%.*]] = phi i64 [ [[INC_PROL]], %[[SCALAR_REMAINDER_PROL]] ], [ [[INC_PROL_1]], %[[SCALAR_REMAINDER_PROL_1]] ], [ [[INC_PROL_2]], %[[SCALAR_REMAINDER_PROL_2]] ]
595+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT]]
596+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT]]:
597+
; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR:%.*]] = phi i64 [ [[I_06_PH]], %[[SCALAR_REMAINDER_PREHEADER]] ], [ [[IV_SCALAR_LOOP_UNR_PH]], %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]] ]
598598
; CORTEXA55-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP10]], 3
599-
; CORTEXA55-NEXT: br i1 [[TMP14]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
600-
; CORTEXA55: [[FOR_BODY_PREHEADER_NEW]]:
601-
; CORTEXA55-NEXT: br label %[[FOR_BODY:.*]]
602-
; CORTEXA55: [[FOR_BODY]]:
603-
; CORTEXA55-NEXT: [[I_06:%.*]] = phi i64 [ [[I_06_UNR]], %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[FOR_BODY]] ]
599+
; CORTEXA55-NEXT: br i1 [[TMP14]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER_NEW:.*]]
600+
; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER_NEW]]:
601+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER:.*]]
602+
; CORTEXA55: [[SCALAR_REMAINDER]]:
603+
; CORTEXA55-NEXT: [[I_06:%.*]] = phi i64 [ [[IV_SCALAR_LOOP_UNR]], %[[SCALAR_REMAINDER_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[SCALAR_REMAINDER]] ]
604604
; CORTEXA55-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
605605
; CORTEXA55-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
606-
; CORTEXA55-NEXT: [[ADD:%.*]] = add i8 [[TMP15]], [[VAL]]
606+
; CORTEXA55-NEXT: [[ADD:%.*]] = add i8 [[TMP15]], [[SPLAT_SCALAR]]
607607
; CORTEXA55-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
608608
; CORTEXA55-NEXT: [[INC:%.*]] = add nuw i64 [[I_06]], 1
609609
; CORTEXA55-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC]]
610610
; CORTEXA55-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
611-
; CORTEXA55-NEXT: [[ADD_1:%.*]] = add i8 [[TMP16]], [[VAL]]
611+
; CORTEXA55-NEXT: [[ADD_1:%.*]] = add i8 [[TMP16]], [[SPLAT_SCALAR]]
612612
; CORTEXA55-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX_1]], align 1
613613
; CORTEXA55-NEXT: [[INC_1:%.*]] = add nuw i64 [[I_06]], 2
614614
; CORTEXA55-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_1]]
615615
; CORTEXA55-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
616-
; CORTEXA55-NEXT: [[ADD_2:%.*]] = add i8 [[TMP17]], [[VAL]]
616+
; CORTEXA55-NEXT: [[ADD_2:%.*]] = add i8 [[TMP17]], [[SPLAT_SCALAR]]
617617
; CORTEXA55-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX_2]], align 1
618618
; CORTEXA55-NEXT: [[INC_2:%.*]] = add nuw i64 [[I_06]], 3
619619
; CORTEXA55-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_2]]
620620
; CORTEXA55-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
621-
; CORTEXA55-NEXT: [[ADD_3:%.*]] = add i8 [[TMP18]], [[VAL]]
621+
; CORTEXA55-NEXT: [[ADD_3:%.*]] = add i8 [[TMP18]], [[SPLAT_SCALAR]]
622622
; CORTEXA55-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX_3]], align 1
623623
; CORTEXA55-NEXT: [[INC_3]] = add nuw i64 [[I_06]], 4
624624
; CORTEXA55-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INC_3]], [[N]]
625-
; CORTEXA55-NEXT: br i1 [[EXITCOND_NOT_3]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
625+
; CORTEXA55-NEXT: br i1 [[EXITCOND_NOT_3]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP3:![0-9]+]]
626626
; CORTEXA55: [[EXIT_LOOPEXIT_UNR_LCSSA]]:
627627
; CORTEXA55-NEXT: br label %[[EXIT_LOOPEXIT]]
628628
; CORTEXA55: [[EXIT_LOOPEXIT]]:
@@ -631,42 +631,42 @@ define void @scalar_epilogue(i64 %N, ptr %p, i8 %val) {
631631
; CORTEXA55-NEXT: ret void
632632
;
633633
entry:
634-
%min.iters.check = icmp ult i64 %N, 32
635-
br i1 %min.iters.check, label %for.body, label %vector.ph
634+
%min.iters.check = icmp ult i64 %n, 32
635+
br i1 %min.iters.check, label %scalar.remainder, label %vector.ph
636636

637637
vector.ph:
638-
%n.vec = and i64 %N, -32
639-
%broadcast.splatinsert = insertelement <16 x i8> poison, i8 %val, i64 0
638+
%n.vec = and i64 %n, -32
639+
%broadcast.splatinsert = insertelement <16 x i8> poison, i8 %splat.scalar, i64 0
640640
%broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer
641641
br label %vector.body
642642

643643
vector.body:
644-
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
645-
%0 = getelementptr inbounds nuw i8, ptr %p, i64 %index
646-
%1 = getelementptr inbounds nuw i8, ptr %0, i64 16
647-
%wide.load = load <16 x i8>, ptr %0, align 1
648-
%wide.load8 = load <16 x i8>, ptr %1, align 1
649-
%2 = add <16 x i8> %wide.load, %broadcast.splat
650-
%3 = add <16 x i8> %wide.load8, %broadcast.splat
651-
store <16 x i8> %2, ptr %0, align 1
652-
store <16 x i8> %3, ptr %1, align 1
653-
%index.next = add nuw i64 %index, 32
654-
%4 = icmp eq i64 %index.next, %n.vec
655-
br i1 %4, label %middle.block, label %vector.body, !llvm.loop !2
644+
%iv = phi i64 [ 0, %vector.ph ], [ %iv.next, %vector.body ]
645+
%gep.p.iv = getelementptr inbounds nuw i8, ptr %p, i64 %iv
646+
%gep.p.iv.16 = getelementptr inbounds nuw i8, ptr %gep.p.iv, i64 16
647+
%wide.load = load <16 x i8>, ptr %gep.p.iv, align 1
648+
%wide.load.2 = load <16 x i8>, ptr %gep.p.iv.16, align 1
649+
%add.broadcast = add <16 x i8> %wide.load, %broadcast.splat
650+
%add.broadcast.2 = add <16 x i8> %wide.load.2, %broadcast.splat
651+
store <16 x i8> %add.broadcast, ptr %gep.p.iv, align 1
652+
store <16 x i8> %add.broadcast.2, ptr %gep.p.iv.16, align 1
653+
%iv.next = add nuw i64 %iv, 32
654+
%exit.cond = icmp eq i64 %iv.next, %n.vec
655+
br i1 %exit.cond, label %middle.block, label %vector.body, !llvm.loop !2
656656

657657
middle.block:
658-
%cmp.n = icmp eq i64 %N, %n.vec
659-
br i1 %cmp.n, label %exit, label %for.body
658+
%cmp.n = icmp eq i64 %n, %n.vec
659+
br i1 %cmp.n, label %exit, label %scalar.remainder
660660

661-
for.body:
662-
%i.06 = phi i64 [ %inc, %for.body ], [ %n.vec, %middle.block ], [ 0, %entry ]
663-
%arrayidx = getelementptr inbounds nuw i8, ptr %p, i64 %i.06
664-
%8 = load i8, ptr %arrayidx, align 1
665-
%add = add i8 %8, %val
661+
scalar.remainder:
662+
%iv.scalar.loop = phi i64 [ %inc, %scalar.remainder ], [ %n.vec, %middle.block ], [ 0, %entry ]
663+
%arrayidx = getelementptr inbounds nuw i8, ptr %p, i64 %iv.scalar.loop
664+
%scalar.load = load i8, ptr %arrayidx, align 1
665+
%add = add i8 %scalar.load, %splat.scalar
666666
store i8 %add, ptr %arrayidx, align 1
667-
%inc = add nuw i64 %i.06, 1
668-
%exitcond.not = icmp eq i64 %inc, %N
669-
br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !3
667+
%inc = add nuw i64 %iv.scalar.loop, 1
668+
%exitcond.not = icmp eq i64 %inc, %n
669+
br i1 %exitcond.not, label %exit, label %scalar.remainder, !llvm.loop !3
670670

671671
exit:
672672
ret void

0 commit comments

Comments
 (0)