@@ -592,6 +592,151 @@ for.end:
592592 ret i32 %for1
593593}
594594
595+ define void @first_order_recurrence_indvar (ptr noalias %A , i64 %TC ) {
596+ ; IF-EVL-LABEL: define void @first_order_recurrence_indvar(
597+ ; IF-EVL-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
598+ ; IF-EVL-NEXT: [[ENTRY:.*]]:
599+ ; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
600+ ; IF-EVL: [[VECTOR_PH]]:
601+ ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
602+ ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
603+ ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
604+ ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP2]]
605+ ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
606+ ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
607+ ; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
608+ ; IF-EVL-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP18]], 2
609+ ; IF-EVL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
610+ ; IF-EVL-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
611+ ; IF-EVL-NEXT: [[TMP12:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
612+ ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP12]]
613+ ; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
614+ ; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP13]], 2
615+ ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i32 [[TMP19]], 1
616+ ; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP10]]
617+ ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
618+ ; IF-EVL: [[VECTOR_BODY]]:
619+ ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
620+ ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
621+ ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
622+ ; IF-EVL-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
623+ ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP5]], %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
624+ ; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
625+ ; IF-EVL-NEXT: [[TMP11]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
626+ ; IF-EVL-NEXT: [[TMP7:%.*]] = zext i32 [[TMP11]] to i64
627+ ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
628+ ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP8]], i64 0
629+ ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
630+ ; IF-EVL-NEXT: [[TMP20]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
631+ ; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP20]], i32 -1)
632+ ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]]
633+ ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i32 0
634+ ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP17]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
635+ ; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64
636+ ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
637+ ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
638+ ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
639+ ; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
640+ ; IF-EVL-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
641+ ; IF-EVL: [[MIDDLE_BLOCK]]:
642+ ; IF-EVL-NEXT: br label %[[FOR_END:.*]]
643+ ; IF-EVL: [[SCALAR_PH]]:
644+ ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
645+ ; IF-EVL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 33, %[[ENTRY]] ]
646+ ; IF-EVL-NEXT: br label %[[FOR_BODY:.*]]
647+ ; IF-EVL: [[FOR_BODY]]:
648+ ; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], %[[FOR_BODY]] ]
649+ ; IF-EVL-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP14:%.*]], %[[FOR_BODY]] ]
650+ ; IF-EVL-NEXT: [[TMP14]] = add i64 [[IV1]], 42
651+ ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV1]]
652+ ; IF-EVL-NEXT: store i64 [[FOR1]], ptr [[ARRAYIDX]], align 8
653+ ; IF-EVL-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1
654+ ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TC]]
655+ ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
656+ ; IF-EVL: [[FOR_END]]:
657+ ; IF-EVL-NEXT: ret void
658+ ;
659+ ; NO-VP-LABEL: define void @first_order_recurrence_indvar(
660+ ; NO-VP-SAME: ptr noalias [[A:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
661+ ; NO-VP-NEXT: [[ENTRY:.*]]:
662+ ; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
663+ ; NO-VP-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
664+ ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], [[TMP1]]
665+ ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
666+ ; NO-VP: [[VECTOR_PH]]:
667+ ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
668+ ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
669+ ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
670+ ; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
671+ ; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
672+ ; NO-VP-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP4]], 2
673+ ; NO-VP-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
674+ ; NO-VP-NEXT: [[TMP7:%.*]] = mul <vscale x 2 x i64> [[TMP6]], splat (i64 1)
675+ ; NO-VP-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP7]]
676+ ; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
677+ ; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
678+ ; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
679+ ; NO-VP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
680+ ; NO-VP-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP14]], 2
681+ ; NO-VP-NEXT: [[TMP20:%.*]] = sub i32 [[TMP16]], 1
682+ ; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 33, i32 [[TMP20]]
683+ ; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
684+ ; NO-VP: [[VECTOR_BODY]]:
685+ ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
686+ ; NO-VP-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
687+ ; NO-VP-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
688+ ; NO-VP-NEXT: [[TMP12]] = add <vscale x 2 x i64> [[VEC_IND]], splat (i64 42)
689+ ; NO-VP-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> [[VECTOR_RECUR]], <vscale x 2 x i64> [[TMP12]], i32 -1)
690+ ; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[INDEX]]
691+ ; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP11]], i32 0
692+ ; NO-VP-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP15]], align 8
693+ ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
694+ ; NO-VP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
695+ ; NO-VP-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
696+ ; NO-VP-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
697+ ; NO-VP: [[MIDDLE_BLOCK]]:
698+ ; NO-VP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
699+ ; NO-VP-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP17]], 2
700+ ; NO-VP-NEXT: [[TMP19:%.*]] = sub i32 [[TMP21]], 1
701+ ; NO-VP-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 2 x i64> [[TMP12]], i32 [[TMP19]]
702+ ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]]
703+ ; NO-VP-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
704+ ; NO-VP: [[SCALAR_PH]]:
705+ ; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
706+ ; NO-VP-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 33, %[[ENTRY]] ]
707+ ; NO-VP-NEXT: br label %[[FOR_BODY:.*]]
708+ ; NO-VP: [[FOR_BODY]]:
709+ ; NO-VP-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], %[[FOR_BODY]] ]
710+ ; NO-VP-NEXT: [[FOR1:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ]
711+ ; NO-VP-NEXT: [[TMP18]] = add i64 [[IV1]], 42
712+ ; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV1]]
713+ ; NO-VP-NEXT: store i64 [[FOR1]], ptr [[ARRAYIDX]], align 8
714+ ; NO-VP-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1
715+ ; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TC]]
716+ ; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
717+ ; NO-VP: [[FOR_END]]:
718+ ; NO-VP-NEXT: ret void
719+ ;
720+ entry:
721+ br label %for.body
722+
723+ for.body:
724+ %indvars = phi i64 [ 0 , %entry ], [ %indvars.next , %for.body ]
725+ %for1 = phi i64 [ 33 , %entry ], [ %x , %for.body ]
726+
727+ %x = add i64 %indvars , 42
728+
729+ %arrayidx = getelementptr inbounds nuw i64 , ptr %A , i64 %indvars
730+ store i64 %for1 , ptr %arrayidx
731+
732+ %indvars.next = add nuw nsw i64 %indvars , 1
733+ %exitcond.not = icmp eq i64 %indvars.next , %TC
734+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !0
735+
736+ for.end:
737+ ret void
738+ }
739+
595740!0 = distinct !{!0 , !1 }
596741!1 = !{!"llvm.loop.vectorize.enable" , i1 true }
597742;.
@@ -606,6 +751,8 @@ for.end:
606751; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META1]]}
607752; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]]}
608753; IF-EVL: [[META10]] = !{!"llvm.loop.vectorize.enable", i1 true}
754+ ; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]], [[META3]]}
755+ ; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META1]]}
609756;.
610757; NO-VP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
611758; NO-VP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -617,4 +764,6 @@ for.end:
617764; NO-VP: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
618765; NO-VP: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
619766; NO-VP: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
767+ ; NO-VP: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
768+ ; NO-VP: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
620769;.
0 commit comments