Skip to content

Commit aaa4623

Browse files
committed
[RISCV] Cost bf16/f16 vector loads and stores as legal without zvfhmin/zvfbfmin
When vectorizing with predication some loops that were previously vectorized without zvfhmin/zvfbfmin will no longer be vectorized because the masked load/store or gather/scatter cost returns illegal. This is due to a discrepancy where for these costs we check isLegalElementTypeForRVV but for regular memory accesses we don't. But for bf16 and f16 vectors we don't actually need the extension support for loads and stores, so this adds a new function which takes this into account. For regular memory accesses we should probably also e.g. return an invalid cost for i64 elements on zve32x, but it doesn't look like we have tests for this yet. We also should probably not be vectorizing these bf16/f16 loops to begin with if we don't have zvfhmin/zvfbfmin and zfhmin/zfbfmin. I think this is due to the scalar costs being too cheap. I've added tests for this in a100f63 to fix in another patch.
1 parent c598013 commit aaa4623

File tree

7 files changed

+143
-21
lines changed

7 files changed

+143
-21
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2739,6 +2739,27 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
27392739
}
27402740
}
27412741

2742+
bool RISCVTargetLowering::isLegalLoadStoreElementTypeForRVV(
2743+
EVT ScalarTy) const {
2744+
if (!ScalarTy.isSimple())
2745+
return false;
2746+
switch (ScalarTy.getSimpleVT().SimpleTy) {
2747+
case MVT::iPTR:
2748+
return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2749+
case MVT::i8:
2750+
case MVT::i16:
2751+
case MVT::i32:
2752+
case MVT::f16:
2753+
case MVT::bf16:
2754+
case MVT::f32:
2755+
return true;
2756+
case MVT::i64:
2757+
case MVT::f64:
2758+
return Subtarget.hasVInstructionsI64();
2759+
default:
2760+
return false;
2761+
}
2762+
}
27422763

27432764
unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
27442765
return NumRepeatedDivisors;
@@ -24239,7 +24260,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
2423924260
return false;
2424024261

2424124262
EVT ScalarType = DataType.getScalarType();
24242-
if (!isLegalElementTypeForRVV(ScalarType))
24263+
if (!isLegalLoadStoreElementTypeForRVV(ScalarType))
2424324264
return false;
2424424265

2424524266
if (!Subtarget.enableUnalignedVectorMem() &&

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ class RISCVTargetLowering : public TargetLowering {
384384
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
385385

386386
bool isLegalElementTypeForRVV(EVT ScalarTy) const;
387+
bool isLegalLoadStoreElementTypeForRVV(EVT ScalarTy) const;
387388

388389
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
389390

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ bool RISCVTargetLowering::isLegalInterleavedAccessType(
3232
if (!isTypeLegal(VT))
3333
return false;
3434

35-
if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
35+
if (!isLegalLoadStoreElementTypeForRVV(VT.getScalarType()) ||
3636
!allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
3737
Alignment))
3838
return false;

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
265265
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
266266
return false;
267267

268-
return TLI->isLegalElementTypeForRVV(ElemType);
268+
return TLI->isLegalLoadStoreElementTypeForRVV(ElemType);
269269
}
270270

271271
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
@@ -297,7 +297,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
297297
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
298298
return false;
299299

300-
return TLI->isLegalElementTypeForRVV(ElemType);
300+
return TLI->isLegalLoadStoreElementTypeForRVV(ElemType);
301301
}
302302

303303
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override {

llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ define void @fixed() {
1313
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
1414
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
1515
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
18-
; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
1919
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
2020
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
2222
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
23-
; CHECK-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
23+
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
2424
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
2525
;
2626
entry:

llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,37 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
2525
; NO-ZVFBFMIN-PREDICATED-LABEL: define void @fadd(
2626
; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
2727
; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
28+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
29+
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]:
30+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15
31+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
32+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
33+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
34+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
35+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
36+
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]]
37+
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]:
38+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
39+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0
40+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
41+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
42+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
43+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[INDEX]]
44+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[INDEX]]
45+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison)
46+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP2]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison)
47+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP3:%.*]] = fadd <16 x bfloat> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD3]]
48+
; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked.store.v16bf16.p0(<16 x bfloat> [[TMP3]], ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]])
49+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
50+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
51+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52+
; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]:
53+
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]]
54+
; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]:
55+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
2856
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
2957
; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
30-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
58+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
3159
; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
3260
; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
3361
; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
@@ -36,7 +64,7 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
3664
; NO-ZVFBFMIN-PREDICATED-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
3765
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
3866
; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
39-
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
67+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
4068
; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
4169
; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
4270
;
@@ -155,22 +183,54 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
155183
; NO-ZVFBFMIN-PREDICATED-LABEL: define void @vfwmaccbf16.vv(
156184
; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
157185
; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
158-
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
159-
; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
160-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
186+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
187+
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]:
188+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3
189+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
190+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
191+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
192+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
193+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
194+
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]]
195+
; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]:
196+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
197+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[I]], i64 0
198+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
199+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
200+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
161201
; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
162202
; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
163203
; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
164-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
165-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
166-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP]], align 4
204+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0(ptr [[A_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison)
205+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0(ptr [[B_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison)
206+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison)
207+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD]] to <4 x float>
208+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP5:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD3]] to <4 x float>
209+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[WIDE_MASKED_LOAD4]])
210+
; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[TMP6]], ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]])
211+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[I]], 4
212+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
213+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
214+
; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]:
215+
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]]
216+
; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]:
217+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
218+
; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
219+
; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
220+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
221+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I1]]
222+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP1:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I1]]
223+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP1:%.*]] = getelementptr float, ptr [[C]], i64 [[I1]]
224+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP1]], align 2
225+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP1]], align 2
226+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP1]], align 4
167227
; NO-ZVFBFMIN-PREDICATED-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float
168228
; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float
169229
; NO-ZVFBFMIN-PREDICATED-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]])
170-
; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP]], align 4
171-
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
230+
; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP1]], align 4
231+
; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I1]], 1
172232
; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
173-
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
233+
; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
174234
; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
175235
; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
176236
;
@@ -254,6 +314,13 @@ exit:
254314
; NO-ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
255315
; NO-ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
256316
;.
317+
; NO-ZVFBFMIN-PREDICATED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
318+
; NO-ZVFBFMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
319+
; NO-ZVFBFMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
320+
; NO-ZVFBFMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
321+
; NO-ZVFBFMIN-PREDICATED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
322+
; NO-ZVFBFMIN-PREDICATED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
323+
;.
257324
; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
258325
; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
259326
; ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}

0 commit comments

Comments
 (0)