Skip to content

Commit 9a9b8b7

Browse files
[AArch64] Allow unrolling of scalar epilogue loops (#151164)
#147420 changed the unrolling preferences to permit unrolling of non-auto vectorized loops by checking for the isvectorized attribute, however when a loop is vectorized this attribute is put on both the vector loop and the scalar epilogue, so this change prevented the scalar epilogue from being unrolled. Restore the previous behaviour of unrolling the scalar epilogue by checking both for the isvectorized attribute and vector instructions in the loop.
1 parent 4ef9246 commit 9a9b8b7

File tree

2 files changed

+202
-5
lines changed

2 files changed

+202
-5
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4905,14 +4905,17 @@ void AArch64TTIImpl::getUnrollingPreferences(
49054905
// Disable partial & runtime unrolling on -Os.
49064906
UP.PartialOptSizeThreshold = 0;
49074907

4908-
// No need to unroll auto-vectorized loops
4909-
if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
4910-
return;
4911-
49124908
// Scan the loop: don't unroll loops with calls as this could prevent
4913-
// inlining.
4909+
// inlining. Don't unroll auto-vectorized loops either, though do allow
4910+
// unrolling of the scalar remainder.
4911+
bool IsVectorized = getBooleanLoopAttribute(L, "llvm.loop.isvectorized");
49144912
for (auto *BB : L->getBlocks()) {
49154913
for (auto &I : *BB) {
4914+
// Both auto-vectorized loops and the scalar remainder have the
4915+
// isvectorized attribute, so differentiate between them by the presence
4916+
// of vector instructions.
4917+
if (IsVectorized && I.getType()->isVectorTy())
4918+
return;
49164919
if (isa<CallBase>(I)) {
49174920
if (isa<CallInst>(I) || isa<InvokeInst>(I))
49184921
if (const Function *F = cast<CallBase>(I).getCalledFunction())

llvm/test/Transforms/LoopUnroll/AArch64/vector.ll

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,12 +485,206 @@ exit: ; preds = %vector.body
485485
!0 = !{!0, !1}
486486
!1 = !{!"llvm.loop.isvectorized", i32 1}
487487

488+
; On Cortex-A55 we should runtime unroll the scalar epilogue loop, but not the
489+
; vector loop.
490+
define void @scalar_epilogue(ptr %p, i8 %splat.scalar, i64 %n) {
491+
; APPLE-LABEL: define void @scalar_epilogue(
492+
; APPLE-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
493+
; APPLE-NEXT: [[ENTRY:.*]]:
494+
; APPLE-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
495+
; APPLE-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
496+
; APPLE: [[VECTOR_PH]]:
497+
; APPLE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
498+
; APPLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
499+
; APPLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
500+
; APPLE-NEXT: br label %[[VECTOR_BODY:.*]]
501+
; APPLE: [[VECTOR_BODY]]:
502+
; APPLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
503+
; APPLE-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INDEX]]
504+
; APPLE-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
505+
; APPLE-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
506+
; APPLE-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
507+
; APPLE-NEXT: [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
508+
; APPLE-NEXT: [[TMP3:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
509+
; APPLE-NEXT: store <16 x i8> [[TMP2]], ptr [[TMP0]], align 1
510+
; APPLE-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP1]], align 1
511+
; APPLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
512+
; APPLE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
513+
; APPLE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
514+
; APPLE: [[MIDDLE_BLOCK]]:
515+
; APPLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
516+
; APPLE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
517+
; APPLE: [[SCALAR_REMAINDER_PREHEADER]]:
518+
; APPLE-NEXT: [[IV_SCALAR_LOOP_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
519+
; APPLE-NEXT: br label %[[SCALAR_REMAINDER:.*]]
520+
; APPLE: [[SCALAR_REMAINDER]]:
521+
; APPLE-NEXT: [[I_06:%.*]] = phi i64 [ [[INC:%.*]], %[[SCALAR_REMAINDER]] ], [ [[IV_SCALAR_LOOP_PH]], %[[SCALAR_REMAINDER_PREHEADER]] ]
522+
; APPLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
523+
; APPLE-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
524+
; APPLE-NEXT: [[ADD:%.*]] = add i8 [[TMP8]], [[SPLAT_SCALAR]]
525+
; APPLE-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
526+
; APPLE-NEXT: [[INC]] = add nuw i64 [[I_06]], 1
527+
; APPLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
528+
; APPLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP5:![0-9]+]]
529+
; APPLE: [[EXIT_LOOPEXIT]]:
530+
; APPLE-NEXT: br label %[[EXIT]]
531+
; APPLE: [[EXIT]]:
532+
; APPLE-NEXT: ret void
533+
;
534+
; CORTEXA55-LABEL: define void @scalar_epilogue(
535+
; CORTEXA55-SAME: ptr [[P:%.*]], i8 [[SPLAT_SCALAR:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
536+
; CORTEXA55-NEXT: [[ENTRY:.*]]:
537+
; CORTEXA55-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[N]], 32
538+
; CORTEXA55-NEXT: br i1 [[MIN_ITERS_CHECK7]], label %[[SCALAR_REMAINDER_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
539+
; CORTEXA55: [[VECTOR_PH]]:
540+
; CORTEXA55-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -32
541+
; CORTEXA55-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[SPLAT_SCALAR]], i64 0
542+
; CORTEXA55-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
543+
; CORTEXA55-NEXT: br label %[[VECTOR_BODY:.*]]
544+
; CORTEXA55: [[VECTOR_BODY]]:
545+
; CORTEXA55-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
546+
; CORTEXA55-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INDEX]]
547+
; CORTEXA55-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
548+
; CORTEXA55-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
549+
; CORTEXA55-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
550+
; CORTEXA55-NEXT: [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
551+
; CORTEXA55-NEXT: [[TMP3:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[BROADCAST_SPLAT]]
552+
; CORTEXA55-NEXT: store <16 x i8> [[TMP2]], ptr [[TMP0]], align 1
553+
; CORTEXA55-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP1]], align 1
554+
; CORTEXA55-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
555+
; CORTEXA55-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
556+
; CORTEXA55-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
557+
; CORTEXA55: [[MIDDLE_BLOCK]]:
558+
; CORTEXA55-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
559+
; CORTEXA55-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER]]
560+
; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER]]:
561+
; CORTEXA55-NEXT: [[I_06_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
562+
; CORTEXA55-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[I_06_PH]]
563+
; CORTEXA55-NEXT: [[TMP9:%.*]] = add i64 [[N]], -1
564+
; CORTEXA55-NEXT: [[TMP10:%.*]] = sub i64 [[TMP9]], [[I_06_PH]]
565+
; CORTEXA55-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP8]], 3
566+
; CORTEXA55-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
567+
; CORTEXA55-NEXT: br i1 [[LCMP_MOD]], label %[[SCALAR_REMAINDER_PROL_PREHEADER:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT:.*]]
568+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_PREHEADER]]:
569+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL:.*]]
570+
; CORTEXA55: [[SCALAR_REMAINDER_PROL]]:
571+
; CORTEXA55-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06_PH]]
572+
; CORTEXA55-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX_PROL]], align 1
573+
; CORTEXA55-NEXT: [[ADD_PROL:%.*]] = add i8 [[TMP11]], [[SPLAT_SCALAR]]
574+
; CORTEXA55-NEXT: store i8 [[ADD_PROL]], ptr [[ARRAYIDX_PROL]], align 1
575+
; CORTEXA55-NEXT: [[INC_PROL:%.*]] = add nuw i64 [[I_06_PH]], 1
576+
; CORTEXA55-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 1, [[XTRAITER]]
577+
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP]], label %[[SCALAR_REMAINDER_PROL_1:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA:.*]]
578+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_1]]:
579+
; CORTEXA55-NEXT: [[ARRAYIDX_PROL_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL]]
580+
; CORTEXA55-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX_PROL_1]], align 1
581+
; CORTEXA55-NEXT: [[ADD_PROL_1:%.*]] = add i8 [[TMP12]], [[SPLAT_SCALAR]]
582+
; CORTEXA55-NEXT: store i8 [[ADD_PROL_1]], ptr [[ARRAYIDX_PROL_1]], align 1
583+
; CORTEXA55-NEXT: [[INC_PROL_1:%.*]] = add nuw i64 [[I_06_PH]], 2
584+
; CORTEXA55-NEXT: [[PROL_ITER_CMP_1:%.*]] = icmp ne i64 2, [[XTRAITER]]
585+
; CORTEXA55-NEXT: br i1 [[PROL_ITER_CMP_1]], label %[[SCALAR_REMAINDER_PROL_2:.*]], label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
586+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_2]]:
587+
; CORTEXA55-NEXT: [[ARRAYIDX_PROL_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_PROL_1]]
588+
; CORTEXA55-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX_PROL_2]], align 1
589+
; CORTEXA55-NEXT: [[ADD_PROL_2:%.*]] = add i8 [[TMP13]], [[SPLAT_SCALAR]]
590+
; CORTEXA55-NEXT: store i8 [[ADD_PROL_2]], ptr [[ARRAYIDX_PROL_2]], align 1
591+
; CORTEXA55-NEXT: [[INC_PROL_2:%.*]] = add nuw i64 [[I_06_PH]], 3
592+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]
593+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]]:
594+
; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR_PH:%.*]] = phi i64 [ [[INC_PROL]], %[[SCALAR_REMAINDER_PROL]] ], [ [[INC_PROL_1]], %[[SCALAR_REMAINDER_PROL_1]] ], [ [[INC_PROL_2]], %[[SCALAR_REMAINDER_PROL_2]] ]
595+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER_PROL_LOOPEXIT]]
596+
; CORTEXA55: [[SCALAR_REMAINDER_PROL_LOOPEXIT]]:
597+
; CORTEXA55-NEXT: [[IV_SCALAR_LOOP_UNR:%.*]] = phi i64 [ [[I_06_PH]], %[[SCALAR_REMAINDER_PREHEADER]] ], [ [[IV_SCALAR_LOOP_UNR_PH]], %[[SCALAR_REMAINDER_PROL_LOOPEXIT_UNR_LCSSA]] ]
598+
; CORTEXA55-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP10]], 3
599+
; CORTEXA55-NEXT: br i1 [[TMP14]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_REMAINDER_PREHEADER_NEW:.*]]
600+
; CORTEXA55: [[SCALAR_REMAINDER_PREHEADER_NEW]]:
601+
; CORTEXA55-NEXT: br label %[[SCALAR_REMAINDER:.*]]
602+
; CORTEXA55: [[SCALAR_REMAINDER]]:
603+
; CORTEXA55-NEXT: [[I_06:%.*]] = phi i64 [ [[IV_SCALAR_LOOP_UNR]], %[[SCALAR_REMAINDER_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[SCALAR_REMAINDER]] ]
604+
; CORTEXA55-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[I_06]]
605+
; CORTEXA55-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
606+
; CORTEXA55-NEXT: [[ADD:%.*]] = add i8 [[TMP15]], [[SPLAT_SCALAR]]
607+
; CORTEXA55-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
608+
; CORTEXA55-NEXT: [[INC:%.*]] = add nuw i64 [[I_06]], 1
609+
; CORTEXA55-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC]]
610+
; CORTEXA55-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
611+
; CORTEXA55-NEXT: [[ADD_1:%.*]] = add i8 [[TMP16]], [[SPLAT_SCALAR]]
612+
; CORTEXA55-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX_1]], align 1
613+
; CORTEXA55-NEXT: [[INC_1:%.*]] = add nuw i64 [[I_06]], 2
614+
; CORTEXA55-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_1]]
615+
; CORTEXA55-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
616+
; CORTEXA55-NEXT: [[ADD_2:%.*]] = add i8 [[TMP17]], [[SPLAT_SCALAR]]
617+
; CORTEXA55-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX_2]], align 1
618+
; CORTEXA55-NEXT: [[INC_2:%.*]] = add nuw i64 [[I_06]], 3
619+
; CORTEXA55-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[INC_2]]
620+
; CORTEXA55-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
621+
; CORTEXA55-NEXT: [[ADD_3:%.*]] = add i8 [[TMP18]], [[SPLAT_SCALAR]]
622+
; CORTEXA55-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX_3]], align 1
623+
; CORTEXA55-NEXT: [[INC_3]] = add nuw i64 [[I_06]], 4
624+
; CORTEXA55-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INC_3]], [[N]]
625+
; CORTEXA55-NEXT: br i1 [[EXITCOND_NOT_3]], label %[[EXIT_LOOPEXIT_UNR_LCSSA:.*]], label %[[SCALAR_REMAINDER]], !llvm.loop [[LOOP3:![0-9]+]]
626+
; CORTEXA55: [[EXIT_LOOPEXIT_UNR_LCSSA]]:
627+
; CORTEXA55-NEXT: br label %[[EXIT_LOOPEXIT]]
628+
; CORTEXA55: [[EXIT_LOOPEXIT]]:
629+
; CORTEXA55-NEXT: br label %[[EXIT]]
630+
; CORTEXA55: [[EXIT]]:
631+
; CORTEXA55-NEXT: ret void
632+
;
633+
entry:
634+
%min.iters.check = icmp ult i64 %n, 32
635+
br i1 %min.iters.check, label %scalar.remainder, label %vector.ph
636+
637+
vector.ph:
638+
%n.vec = and i64 %n, -32
639+
%broadcast.splatinsert = insertelement <16 x i8> poison, i8 %splat.scalar, i64 0
640+
%broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer
641+
br label %vector.body
642+
643+
vector.body:
644+
%iv = phi i64 [ 0, %vector.ph ], [ %iv.next, %vector.body ]
645+
%gep.p.iv = getelementptr inbounds nuw i8, ptr %p, i64 %iv
646+
%gep.p.iv.16 = getelementptr inbounds nuw i8, ptr %gep.p.iv, i64 16
647+
%wide.load = load <16 x i8>, ptr %gep.p.iv, align 1
648+
%wide.load.2 = load <16 x i8>, ptr %gep.p.iv.16, align 1
649+
%add.broadcast = add <16 x i8> %wide.load, %broadcast.splat
650+
%add.broadcast.2 = add <16 x i8> %wide.load.2, %broadcast.splat
651+
store <16 x i8> %add.broadcast, ptr %gep.p.iv, align 1
652+
store <16 x i8> %add.broadcast.2, ptr %gep.p.iv.16, align 1
653+
%iv.next = add nuw i64 %iv, 32
654+
%exit.cond = icmp eq i64 %iv.next, %n.vec
655+
br i1 %exit.cond, label %middle.block, label %vector.body, !llvm.loop !2
656+
657+
middle.block:
658+
%cmp.n = icmp eq i64 %n, %n.vec
659+
br i1 %cmp.n, label %exit, label %scalar.remainder
660+
661+
scalar.remainder:
662+
%iv.scalar.loop = phi i64 [ %inc, %scalar.remainder ], [ %n.vec, %middle.block ], [ 0, %entry ]
663+
%arrayidx = getelementptr inbounds nuw i8, ptr %p, i64 %iv.scalar.loop
664+
%scalar.load = load i8, ptr %arrayidx, align 1
665+
%add = add i8 %scalar.load, %splat.scalar
666+
store i8 %add, ptr %arrayidx, align 1
667+
%inc = add nuw i64 %iv.scalar.loop, 1
668+
%exitcond.not = icmp eq i64 %inc, %n
669+
br i1 %exitcond.not, label %exit, label %scalar.remainder, !llvm.loop !3
670+
671+
exit:
672+
ret void
673+
}
674+
675+
!2 = distinct !{!2, !1}
676+
!3 = distinct !{!3, !1}
677+
488678
;.
489679
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
490680
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
491681
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
492682
; APPLE: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
683+
; APPLE: [[LOOP4]] = distinct !{[[LOOP4]], [[META3]]}
684+
; APPLE: [[LOOP5]] = distinct !{[[LOOP5]], [[META3]]}
493685
;.
494686
; CORTEXA55: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
495687
; CORTEXA55: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
688+
; CORTEXA55: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
689+
; CORTEXA55: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
496690
;.

0 commit comments

Comments
 (0)