diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 54845e5374131..607edd3d859f8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2739,6 +2739,27 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { } } +bool RISCVTargetLowering::isLegalLoadStoreElementTypeForRVV( + EVT ScalarTy) const { + if (!ScalarTy.isSimple()) + return false; + switch (ScalarTy.getSimpleVT().SimpleTy) { + case MVT::iPTR: + return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true; + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::f16: + case MVT::bf16: + case MVT::f32: + return true; + case MVT::i64: + case MVT::f64: + return Subtarget.hasVInstructionsI64(); + default: + return false; + } +} unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const { return NumRepeatedDivisors; @@ -24239,7 +24260,7 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, return false; EVT ScalarType = DataType.getScalarType(); - if (!isLegalElementTypeForRVV(ScalarType)) + if (!isLegalLoadStoreElementTypeForRVV(ScalarType)) return false; if (!Subtarget.enableUnalignedVectorMem() && diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index ca70c46988b4e..a788c0b72184b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -384,6 +384,7 @@ class RISCVTargetLowering : public TargetLowering { bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; bool isLegalElementTypeForRVV(EVT ScalarTy) const; + bool isLegalLoadStoreElementTypeForRVV(EVT ScalarTy) const; bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index 30d8f850763a2..3cbe668b08244 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -32,7 +32,7 @@ bool RISCVTargetLowering::isLegalInterleavedAccessType( if (!isTypeLegal(VT)) return false; - if (!isLegalElementTypeForRVV(VT.getScalarType()) || + if (!isLegalLoadStoreElementTypeForRVV(VT.getScalarType()) || !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, Alignment)) return false; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index d62d99cf31899..f0510ec65b9d4 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -265,7 +265,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase { if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; - return TLI->isLegalElementTypeForRVV(ElemType); + return TLI->isLegalLoadStoreElementTypeForRVV(ElemType); } bool isLegalMaskedLoad(Type *DataType, Align Alignment, @@ -297,7 +297,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase { if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; - return TLI->isLegalElementTypeForRVV(ElemType); + return TLI->isLegalLoadStoreElementTypeForRVV(ElemType); } bool isLegalMaskedGather(Type *DataType, Align Alignment) const override { diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll index 892277a2d5740..68c89c3f77b3f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll @@ -13,14 +13,14 @@ define void @fixed() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll index 672e94962da6d..b505917402e31 100644 --- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -874,3 +874,79 @@ define void @load_factor2_fp128(ptr %ptr) { %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> ret void } + +define void @load_factor2_f32(ptr %ptr) { +; RV32-LABEL: @load_factor2_f32( +; RV32-NEXT: [[TMP1:%.*]] = call { <8 x float>, <8 x float> } @llvm.riscv.seg2.load.mask.v8f32.p0.i32(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8) +; RV32-NEXT: [[TMP2:%.*]] = extractvalue { <8 x float>, <8 x float> } [[TMP1]], 1 +; RV32-NEXT: [[TMP3:%.*]] = extractvalue { <8 x float>, <8 x float> } [[TMP1]], 0 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor2_f32( +; RV64-NEXT: [[TMP1:%.*]] = call { <8 x float>, <8 x float> } @llvm.riscv.seg2.load.mask.v8f32.p0.i64(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8) +; RV64-NEXT: [[TMP2:%.*]] = extractvalue { <8 x float>, <8 x float> } [[TMP1]], 1 +; RV64-NEXT: [[TMP3:%.*]] = extractvalue { <8 x float>, <8 x float> } [[TMP1]], 0 +; RV64-NEXT: ret void +; + %interleaved.vec = load <16 x float>, ptr %ptr + %v0 = shufflevector <16 x float> %interleaved.vec, <16 x float> poison, <8 x i32> + %v1 = shufflevector <16 x float> %interleaved.vec, <16 x float> poison, <8 x i32> + ret void +} + +define void @load_factor2_f64(ptr %ptr) { +; RV32-LABEL: @load_factor2_f64( +; RV32-NEXT: [[TMP1:%.*]] = call { <8 x double>, <8 x double> } @llvm.riscv.seg2.load.mask.v8f64.p0.i32(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i32 8) +; RV32-NEXT: [[TMP2:%.*]] = extractvalue { <8 x double>, <8 x double> } [[TMP1]], 1 +; RV32-NEXT: [[TMP3:%.*]] = extractvalue { <8 x double>, <8 x double> } [[TMP1]], 0 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor2_f64( +; RV64-NEXT: [[TMP1:%.*]] = call { <8 x double>, <8 x double> } @llvm.riscv.seg2.load.mask.v8f64.p0.i64(ptr [[PTR:%.*]], <8 x i1> splat (i1 true), i64 8) +; RV64-NEXT: [[TMP2:%.*]] = extractvalue { <8 x double>, <8 x double> } [[TMP1]], 1 +; RV64-NEXT: [[TMP3:%.*]] = extractvalue { <8 x double>, <8 x double> } [[TMP1]], 0 +; RV64-NEXT: ret void +; + %interleaved.vec = load <16 x double>, ptr %ptr + %v0 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <8 x i32> + %v1 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <8 x i32> + ret void +} + +define void @load_factor2_bf16(ptr %ptr) { +; RV32-LABEL: @load_factor2_bf16( +; RV32-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x bfloat>, ptr [[PTR:%.*]], align 32 +; RV32-NEXT: [[V0:%.*]] = shufflevector <16 x bfloat> [[INTERLEAVED_VEC]], <16 x bfloat> poison, <8 x i32> +; RV32-NEXT: [[V1:%.*]] = shufflevector <16 x bfloat> [[INTERLEAVED_VEC]], <16 x bfloat> poison, <8 x i32> +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor2_bf16( +; RV64-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x bfloat>, ptr [[PTR:%.*]], align 32 +; RV64-NEXT: [[V0:%.*]] = shufflevector <16 x bfloat> [[INTERLEAVED_VEC]], <16 x bfloat> poison, <8 x i32> +; RV64-NEXT: [[V1:%.*]] = shufflevector <16 x bfloat> [[INTERLEAVED_VEC]], <16 x bfloat> poison, <8 x i32> +; RV64-NEXT: ret void +; + %interleaved.vec = load <16 x bfloat>, ptr %ptr + %v0 = shufflevector <16 x bfloat> %interleaved.vec, <16 x bfloat> poison, <8 x i32> + %v1 = shufflevector <16 x bfloat> %interleaved.vec, <16 x bfloat> poison, <8 x i32> + ret void +} + +define void @load_factor2_f16(ptr %ptr) { +; RV32-LABEL: @load_factor2_f16( +; RV32-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x half>, ptr [[PTR:%.*]], align 32 +; RV32-NEXT: [[V0:%.*]] = shufflevector <16 x half> [[INTERLEAVED_VEC]], <16 x half> poison, <8 x i32> +; RV32-NEXT: [[V1:%.*]] = shufflevector <16 x half> [[INTERLEAVED_VEC]], <16 x half> poison, <8 x i32> +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor2_f16( +; RV64-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x half>, ptr [[PTR:%.*]], align 32 +; RV64-NEXT: [[V0:%.*]] = shufflevector <16 x half> [[INTERLEAVED_VEC]], <16 x half> poison, <8 x i32> +; RV64-NEXT: [[V1:%.*]] = shufflevector <16 x half> [[INTERLEAVED_VEC]], <16 x half> poison, <8 x i32> +; RV64-NEXT: ret void +; + %interleaved.vec = load <16 x half>, ptr %ptr + %v0 = shufflevector <16 x half> %interleaved.vec, <16 x half> poison, <8 x i32> + %v1 = shufflevector <16 x half> %interleaved.vec, <16 x half> poison, <8 x i32> + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll index d5b25bfe349b9..03c6f089df9aa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFBFMIN +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFBFMIN-PREDICATED ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -S | FileCheck %s -check-prefix=ZVFBFMIN define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { @@ -21,6 +22,52 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-ZVFBFMIN: [[EXIT]]: ; NO-ZVFBFMIN-NEXT: ret void ; +; NO-ZVFBFMIN-PREDICATED-LABEL: define void @fadd( +; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer +; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT2]], +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[INDEX]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[INDEX]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x bfloat> @llvm.masked.load.v16bf16.p0(ptr [[TMP2]], i32 2, <16 x i1> [[TMP0]], <16 x bfloat> poison) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP3:%.*]] = fadd <16 x bfloat> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD3]] +; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked.store.v16bf16.p0(<16 x bfloat> [[TMP3]], ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]]) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]] +; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] +; NO-ZVFBFMIN-PREDICATED: [[LOOP]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]] +; NO-ZVFBFMIN-PREDICATED-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-ZVFBFMIN-PREDICATED: [[EXIT]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: ret void +; ; ZVFBFMIN-LABEL: define void @fadd( ; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: @@ -133,6 +180,60 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; NO-ZVFBFMIN: [[EXIT]]: ; NO-ZVFBFMIN-NEXT: ret void ; +; NO-ZVFBFMIN-PREDICATED-LABEL: define void @vfwmaccbf16.vv( +; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 3 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[I]], i64 0 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer +; NO-ZVFBFMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0(ptr [[A_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x bfloat> @llvm.masked.load.v4bf16.p0(ptr [[B_GEP]], i32 2, <4 x i1> [[TMP0]], <4 x bfloat> poison) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD]] to <4 x float> +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP5:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD3]] to <4 x float> +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[WIDE_MASKED_LOAD4]]) +; NO-ZVFBFMIN-PREDICATED-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[TMP6]], ptr [[C_GEP]], i32 4, <4 x i1> [[TMP0]]) +; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[I]], 4 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[EXIT:.*]] +; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] +; NO-ZVFBFMIN-PREDICATED: [[LOOP]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I1]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP1:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I1]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP1:%.*]] = getelementptr float, ptr [[C]], i64 [[I1]] +; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP1]], align 2 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP1]], align 2 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP1]], align 4 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float +; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float +; NO-ZVFBFMIN-PREDICATED-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]]) +; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP1]], align 4 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I1]], 1 +; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; NO-ZVFBFMIN-PREDICATED: [[EXIT]]: +; NO-ZVFBFMIN-PREDICATED-NEXT: ret void +; ; ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv( ; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: @@ -213,6 +314,13 @@ exit: ; NO-ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; NO-ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ;. +; NO-ZVFBFMIN-PREDICATED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; NO-ZVFBFMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; NO-ZVFBFMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; NO-ZVFBFMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; NO-ZVFBFMIN-PREDICATED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; NO-ZVFBFMIN-PREDICATED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. ; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll index 5b5655216d9ce..a2eef84a4e1ec 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFHMIN +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFHMIN-PREDICATED ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -S | FileCheck %s -check-prefix=ZVFHMIN define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { @@ -21,6 +22,52 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-ZVFHMIN: [[EXIT]]: ; NO-ZVFHMIN-NEXT: ret void ; +; NO-ZVFHMIN-PREDICATED-LABEL: define void @fadd( +; NO-ZVFHMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-ZVFHMIN-PREDICATED-NEXT: [[ENTRY:.*]]: +; NO-ZVFHMIN-PREDICATED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NO-ZVFHMIN-PREDICATED: [[VECTOR_PH]]: +; NO-ZVFHMIN-PREDICATED-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15 +; NO-ZVFHMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 +; NO-ZVFHMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; NO-ZVFHMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; NO-ZVFHMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]] +; NO-ZVFHMIN-PREDICATED: [[VECTOR_BODY]]: +; NO-ZVFHMIN-PREDICATED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NO-ZVFHMIN-PREDICATED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0 +; NO-ZVFHMIN-PREDICATED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer +; NO-ZVFHMIN-PREDICATED-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT2]], +; NO-ZVFHMIN-PREDICATED-NEXT: [[TMP0:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[TMP1:%.*]] = getelementptr half, ptr [[A]], i64 [[INDEX]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[TMP2:%.*]] = getelementptr half, ptr [[B]], i64 [[INDEX]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x half> @llvm.masked.load.v16f16.p0(ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]], <16 x half> poison) +; NO-ZVFHMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x half> @llvm.masked.load.v16f16.p0(ptr [[TMP2]], i32 2, <16 x i1> [[TMP0]], <16 x half> poison) +; NO-ZVFHMIN-PREDICATED-NEXT: [[TMP3:%.*]] = fadd <16 x half> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD3]] +; NO-ZVFHMIN-PREDICATED-NEXT: call void @llvm.masked.store.v16f16.p0(<16 x half> [[TMP3]], ptr [[TMP1]], i32 2, <16 x i1> [[TMP0]]) +; NO-ZVFHMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; NO-ZVFHMIN-PREDICATED-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-ZVFHMIN-PREDICATED: [[MIDDLE_BLOCK]]: +; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[EXIT:.*]] +; NO-ZVFHMIN-PREDICATED: [[SCALAR_PH]]: +; NO-ZVFHMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ] +; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] +; NO-ZVFHMIN-PREDICATED: [[LOOP]]: +; NO-ZVFHMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; NO-ZVFHMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]] +; NO-ZVFHMIN-PREDICATED-NEXT: store half [[Z]], ptr [[A_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; NO-ZVFHMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-ZVFHMIN-PREDICATED: [[EXIT]]: +; NO-ZVFHMIN-PREDICATED-NEXT: ret void +; ; ZVFHMIN-LABEL: define void @fadd( ; ZVFHMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; ZVFHMIN-NEXT: [[ENTRY:.*]]: @@ -84,6 +131,11 @@ exit: ret void } ;. +; NO-ZVFHMIN-PREDICATED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; NO-ZVFHMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; NO-ZVFHMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; NO-ZVFHMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. ; ZVFHMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; ZVFHMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; ZVFHMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}