Skip to content

[RISCV] Enable tail folding by default #151681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 8, 2025
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool enableScalableVectorization() const override {
return ST->hasVInstructions();
}
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override {
return ST->hasVInstructions();
}
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
Expand Down
78 changes: 41 additions & 37 deletions llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,39 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; ZVFBFMIN-LABEL: define void @fadd(
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8
; ZVFBFMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP8]]
; ZVFBFMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; ZVFBFMIN-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; ZVFBFMIN: [[VECTOR_PH]]:
; ZVFBFMIN-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP10]]
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; ZVFBFMIN-NEXT: [[TMP3:%.*]] = sub i64 [[TMP10]], 1
; ZVFBFMIN-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP3]]
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP10]]
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP12]], 8
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
; ZVFBFMIN: [[VECTOR_BODY]]:
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; ZVFBFMIN-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP0]]
; ZVFBFMIN-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP0]]
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP1]], align 2
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP2]], align 2
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr align 2 [[TMP1]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = call <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr align 2 [[TMP2]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = fadd <vscale x 8 x bfloat> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; ZVFBFMIN-NEXT: store <vscale x 8 x bfloat> [[TMP11]], ptr [[TMP1]], align 2
; ZVFBFMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], [[TMP5]]
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; ZVFBFMIN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP11]], ptr align 2 [[TMP1]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; ZVFBFMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[TMP0]]
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]]
; ZVFBFMIN-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; ZVFBFMIN-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
; ZVFBFMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; ZVFBFMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; ZVFBFMIN-NEXT: br label %[[EXIT:.*]]
; ZVFBFMIN: [[SCALAR_PH]]:
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
; ZVFBFMIN: [[LOOP]]:
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
; ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
Expand All @@ -65,7 +67,7 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; ZVFBFMIN-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; ZVFBFMIN: [[EXIT]]:
; ZVFBFMIN-NEXT: ret void
;
Expand Down Expand Up @@ -138,41 +140,43 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
; ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv(
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
; ZVFBFMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; ZVFBFMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; ZVFBFMIN-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; ZVFBFMIN: [[VECTOR_PH]]:
; ZVFBFMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], 1
; ZVFBFMIN-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]]
; ZVFBFMIN-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]]
; ZVFBFMIN-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; ZVFBFMIN-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; ZVFBFMIN-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
; ZVFBFMIN: [[VECTOR_BODY]]:
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP6]]
; ZVFBFMIN-NEXT: [[TMP8:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP6]]
; ZVFBFMIN-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[C]], i64 [[TMP6]]
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x bfloat>, ptr [[TMP7]], align 2
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x bfloat>, ptr [[TMP8]], align 2
; ZVFBFMIN-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x bfloat> @llvm.vp.load.nxv4bf16.p0(ptr align 2 [[TMP7]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = call <vscale x 4 x bfloat> @llvm.vp.load.nxv4bf16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; ZVFBFMIN-NEXT: [[WIDE_LOAD2:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; ZVFBFMIN-NEXT: [[TMP13:%.*]] = fpext <vscale x 4 x bfloat> [[WIDE_LOAD]] to <vscale x 4 x float>
; ZVFBFMIN-NEXT: [[TMP14:%.*]] = fpext <vscale x 4 x bfloat> [[WIDE_LOAD1]] to <vscale x 4 x float>
; ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[TMP13]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[WIDE_LOAD2]])
; ZVFBFMIN-NEXT: store <vscale x 4 x float> [[TMP15]], ptr [[TMP9]], align 4
; ZVFBFMIN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], [[TMP5]]
; ZVFBFMIN-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; ZVFBFMIN-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP15]], ptr align 4 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[TMP6]]
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
; ZVFBFMIN-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
; ZVFBFMIN-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
; ZVFBFMIN-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; ZVFBFMIN-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; ZVFBFMIN-NEXT: br label %[[EXIT:.*]]
; ZVFBFMIN: [[SCALAR_PH]]:
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; ZVFBFMIN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; ZVFBFMIN-NEXT: br label %[[LOOP:.*]]
; ZVFBFMIN: [[LOOP]]:
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
; ZVFBFMIN-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
Expand All @@ -185,7 +189,7 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
; ZVFBFMIN-NEXT: store float [[FMULADD]], ptr [[C_GEP]], align 4
; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1
; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; ZVFBFMIN: [[EXIT]]:
; ZVFBFMIN-NEXT: ret void
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,50 +62,10 @@ define i32 @any_of_reduction_used_in_blend_with_multiple_phis(ptr %src, i64 %N,
; CHECK-LABEL: define i32 @any_of_reduction_used_in_blend_with_multiple_phis(
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i1 [[C_0:%.*]], i1 [[C_1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C_0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = xor <vscale x 2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i1> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[SRC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT4]], i32 8, <vscale x 2 x i1> [[TMP8]], <vscale x 2 x ptr> poison)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 2 x ptr> [[WIDE_MASKED_GATHER]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = or <vscale x 2 x i1> [[VEC_PHI]], [[TMP9]]
; CHECK-NEXT: [[PREDPHI]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i1> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cost model caused?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've included a note about this in the PR description

; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> [[PREDPHI]])
; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]]
; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 0, i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[ANY_OF_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ANY_OF_RED_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: [[ANY_OF_RED:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ANY_OF_RED_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: br i1 [[C_0]], label %[[X_1:.*]], label %[[ELSE_1:.*]]
; CHECK: [[ELSE_1]]:
; CHECK-NEXT: br i1 [[C_1]], label %[[X_1]], label %[[ELSE_2:.*]]
Expand All @@ -121,9 +81,9 @@ define i32 @any_of_reduction_used_in_blend_with_multiple_phis(ptr %src, i64 %N,
; CHECK-NEXT: [[ANY_OF_RED_NEXT]] = phi i32 [ [[P]], %[[X_1]] ], [ [[SEL]], %[[ELSE_2]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ANY_OF_RED_NEXT]], %[[LOOP_LATCH]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[ANY_OF_RED_NEXT]], %[[LOOP_LATCH]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
Expand Down Expand Up @@ -159,9 +119,3 @@ exit:
}

attributes #0 = { "target-cpu"="sifive-p670" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.
Loading