From a8dfd1afc49047cb740bade4d2f879c94832d8a4 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 15 Sep 2025 13:40:55 +0100 Subject: [PATCH 1/4] [IR] NFC: Remove 'experimental' from partial.reduce.add intrinsic The partial reduction intrinsics are not experimental, because they've been used in production for a while now and are unlikely to change. --- llvm/docs/LangRef.rst | 12 +- .../llvm/Analysis/TargetTransformInfo.h | 2 +- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/include/llvm/IR/Intrinsics.td | 6 +- .../lib/CodeGen/ComplexDeinterleavingPass.cpp | 6 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 +- llvm/lib/IR/AutoUpgrade.cpp | 6 +- llvm/lib/IR/Verifier.cpp | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 14 +- .../WebAssembly/WebAssemblyISelLowering.cpp | 5 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 +- ...ade-vector-partial-reduce-add-intrinsic.ll | 25 +++ .../AArch64/GlobalISel/combine-addv.mir | 4 +- .../AArch64/complex-deinterleaving-cdot.ll | 152 +++++++++--------- .../complex-deinterleaving-unrolled-cdot.ll | 40 ++--- .../neon-partial-reduce-dot-product.ll | 62 +++---- .../CodeGen/AArch64/partial-reduction-add.ll | 20 +-- .../sve-fixed-length-partial-reduce.ll | 48 +++--- .../AArch64/sve-partial-reduce-dot-product.ll | 62 +++---- .../AArch64/sve-partial-reduce-wide-add.ll | 20 +-- .../RISCV/rvv/fixed-vectors-zvqdotq.ll | 38 ++--- .../RISCV/rvv/partial-reduction-add.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll | 24 +-- .../AArch64/partial-reduce-chained.ll | 84 +++++----- .../partial-reduce-dot-product-epilogue.ll | 6 +- .../partial-reduce-dot-product-mixed.ll | 24 +-- .../partial-reduce-dot-product-neon.ll | 46 +++--- .../AArch64/partial-reduce-dot-product.ll | 60 +++---- .../AArch64/partial-reduce-interleave.ll | 12 +- .../AArch64/partial-reduce-sub.ll | 2 +- .../LoopVectorize/AArch64/partial-reduce.ll | 28 ++-- .../RISCV/partial-reduce-dot-product.ll | 24 +-- .../VectorCombine/intrinsic-scalarize.ll | 4 +- 33 files changed, 440 insertions(+), 420 deletions(-) create mode 100644 llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index d61ea07830123..61c8415873092 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -20562,7 +20562,7 @@ Note that it has the following implications: - If ``%cnt`` is non-zero, the return value is non-zero as well. - If ``%cnt`` is less than or equal to ``%max_lanes``, the return value is equal to ``%cnt``. -'``llvm.experimental.vector.partial.reduce.add.*``' Intrinsic +'``llvm.vector.partial.reduce.add.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: @@ -20571,15 +20571,15 @@ This is an overloaded intrinsic. :: - declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %a, <8 x i32> %b) - declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> %a, <16 x i32> %b) - declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32( %a, %b) - declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32( %a, %b) + declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %a, <8 x i32> %b) + declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v16i32(<4 x i32> %a, <16 x i32> %b) + declare @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32( %a, %b) + declare @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32( %a, %b) Overview: """"""""" -The '``llvm.vector.experimental.partial.reduce.add.*``' intrinsics reduce the +The '``llvm.vector.partial.reduce.add.*``' intrinsics reduce the concatenation of the two vector arguments down to the number of elements of the result vector type. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index ca8c4428251bb..41ff54f0781a2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1328,7 +1328,7 @@ class TargetTransformInfo { /// \return The cost of a partial reduction, which is a reduction from a /// vector to another vector with fewer elements of larger size. They are - /// represented by the llvm.experimental.partial.reduce.add intrinsic, which + /// represented by the llvm.vector.partial.reduce.add intrinsic, which /// takes an accumulator of type \p AccumType and a second vector operand to /// be accumulated, whose element count is specified by \p VF. The type of /// reduction is specified by \p Opcode. The second operand passed to the diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 2ba8b29e775e0..46be271320fdd 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -480,7 +480,7 @@ class LLVM_ABI TargetLoweringBase { return true; } - /// Return true if the @llvm.experimental.vector.partial.reduce.* intrinsic + /// Return true if the @llvm.vector.partial.reduce.* intrinsic /// should be expanded using generic code in SelectionDAGBuilder. virtual bool shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index fb9ea10ac9127..585371a6a4423 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2797,9 +2797,9 @@ foreach n = 2...8 in { //===-------------- Intrinsics to perform partial reduction ---------------===// -def int_experimental_vector_partial_reduce_add : DefaultAttrsIntrinsic<[LLVMMatchType<0>], - [llvm_anyvector_ty, llvm_anyvector_ty], - [IntrNoMem]>; +def int_vector_partial_reduce_add : DefaultAttrsIntrinsic<[LLVMMatchType<0>], + [llvm_anyvector_ty, llvm_anyvector_ty], + [IntrNoMem]>; //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index 7d355e6e365d3..6c2a5a7da84d3 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -1022,8 +1022,7 @@ ComplexDeinterleavingGraph::identifyDotProduct(Value *V) { CompositeNode *ANode = nullptr; - const Intrinsic::ID PartialReduceInt = - Intrinsic::experimental_vector_partial_reduce_add; + const Intrinsic::ID PartialReduceInt = Intrinsic::vector_partial_reduce_add; Value *AReal = nullptr; Value *AImag = nullptr; @@ -1139,8 +1138,7 @@ ComplexDeinterleavingGraph::identifyPartialReduction(Value *R, Value *I) { return nullptr; auto *IInst = dyn_cast(*CommonUser); - if (!IInst || IInst->getIntrinsicID() != - Intrinsic::experimental_vector_partial_reduce_add) + if (!IInst || IInst->getIntrinsicID() != Intrinsic::vector_partial_reduce_add) return nullptr; if (CompositeNode *CN = identifyDotProduct(IInst)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 299acf6b1c080..070d7978ce48f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8102,7 +8102,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Trunc); return; } - case Intrinsic::experimental_vector_partial_reduce_add: { + case Intrinsic::vector_partial_reduce_add: { if (!TLI.shouldExpandPartialReductionIntrinsic(cast(&I))) { visitTargetIntrinsic(I, Intrinsic); return; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 8d8120ac9ed90..0eb3bf961b67a 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1259,6 +1259,7 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, .StartsWith("reverse.", Intrinsic::vector_reverse) .StartsWith("interleave2.", Intrinsic::vector_interleave2) .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2) + .StartsWith("partial.reduce.add", Intrinsic::vector_partial_reduce_add) .Default(Intrinsic::not_intrinsic); if (ID != Intrinsic::not_intrinsic) { const auto *FT = F->getFunctionType(); @@ -1269,8 +1270,9 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, Tys.push_back(FT->getReturnType()); if (ID != Intrinsic::vector_interleave2) Tys.push_back(FT->getParamType(0)); - if (ID == Intrinsic::vector_insert) - // Inserting overloads the inserted type. + if (ID == Intrinsic::vector_insert || + ID == Intrinsic::vector_partial_reduce_add) + // Inserting overloads the inserted type. Tys.push_back(FT->getParamType(1)); rename(F); NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c06b60fd2d9a9..e9ee130dd5e91 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6530,7 +6530,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { } break; } - case Intrinsic::experimental_vector_partial_reduce_add: { + case Intrinsic::vector_partial_reduce_add: { VectorType *AccTy = cast(Call.getArgOperand(0)->getType()); VectorType *VecTy = cast(Call.getArgOperand(1)->getType()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 17703ab165768..27ad3cc2a4e44 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2184,8 +2184,7 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT, bool AArch64TargetLowering::shouldExpandPartialReductionIntrinsic( const IntrinsicInst *I) const { - assert(I->getIntrinsicID() == - Intrinsic::experimental_vector_partial_reduce_add && + assert(I->getIntrinsicID() == Intrinsic::vector_partial_reduce_add && "Unexpected intrinsic!"); return true; } @@ -17474,8 +17473,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())))) return true; if (match(SingleUser, - m_Intrinsic< - Intrinsic::experimental_vector_partial_reduce_add>( + m_Intrinsic( m_Value(), m_Specific(I)))) return true; return false; @@ -22510,8 +22508,7 @@ SDValue tryLowerPartialReductionToDot(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - getIntrinsicID(N) == - Intrinsic::experimental_vector_partial_reduce_add && + getIntrinsicID(N) == Intrinsic::vector_partial_reduce_add && "Expected a partial reduction node"); bool Scalable = N->getValueType(0).isScalableVector(); @@ -22605,8 +22602,7 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - getIntrinsicID(N) == - Intrinsic::experimental_vector_partial_reduce_add && + getIntrinsicID(N) == Intrinsic::vector_partial_reduce_add && "Expected a partial reduction node"); if (!Subtarget->hasSVE2() && !Subtarget->isStreamingSVEAvailable()) @@ -22671,7 +22667,7 @@ static SDValue performIntrinsicCombine(SDNode *N, switch (IID) { default: break; - case Intrinsic::experimental_vector_partial_reduce_add: { + case Intrinsic::vector_partial_reduce_add: { if (SDValue Dot = tryLowerPartialReductionToDot(N, Subtarget, DAG)) return Dot; if (SDValue WideAdd = tryLowerPartialReductionToWideAdd(N, Subtarget, DAG)) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index aea27ba32d37e..64b9dc31f75b7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -418,7 +418,7 @@ MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL, bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic( const IntrinsicInst *I) const { - if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add) + if (I->getIntrinsicID() != Intrinsic::vector_partial_reduce_add) return true; EVT VT = EVT::getEVT(I->getType()); @@ -2117,8 +2117,7 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, // extmul and adds. SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); - if (N->getConstantOperandVal(0) != - Intrinsic::experimental_vector_partial_reduce_add) + if (N->getConstantOperandVal(0) != Intrinsic::vector_partial_reduce_add) return SDValue(); assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2844b8348027b..8e9c3db50319f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -375,9 +375,9 @@ void VPPartialReductionRecipe::execute(VPTransformState &State) { Type *RetTy = PhiVal->getType(); - CallInst *V = Builder.CreateIntrinsic( - RetTy, Intrinsic::experimental_vector_partial_reduce_add, - {PhiVal, BinOpVal}, nullptr, "partial.reduce"); + CallInst *V = + Builder.CreateIntrinsic(RetTy, Intrinsic::vector_partial_reduce_add, + {PhiVal, BinOpVal}, nullptr, "partial.reduce"); State.set(this, V); } diff --git a/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll b/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll new file mode 100644 index 0000000000000..1277d5d933d7b --- /dev/null +++ b/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll @@ -0,0 +1,25 @@ +; RUN: opt -S < %s | FileCheck %s +; RUN: llvm-as %s -o - | llvm-dis | FileCheck %s + +define <4 x i32> @partial_reduce_add_fixed(<16 x i32> %a) { +; CHECK-LABEL: @partial_reduce_add_fixed +; CHECK: %res = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> zeroinitializer, <16 x i32> %a) + + %res = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> zeroinitializer, <16 x i32> %a) + ret <4 x i32> %res +} + + +define @partial_reduce_add_scalable( %a) { +; CHECK-LABEL: @partial_reduce_add_scalable +; CHECK: %res = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( zeroinitializer, %a) + + %res = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( zeroinitializer, %a) + ret %res +} + +declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) +; CHECK-DAG: declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) + +declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) +; CHECK-DAG: declare @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-addv.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-addv.mir index ae08cd9d5bfef..3e4a856aed2ec 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-addv.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-addv.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $q4 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>), [[COPY3]](<4 x s32>), [[COPY4]](<4 x s32>) - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.experimental.vector.partial.reduce.add), [[COPY]](<4 x s32>), [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.vector.partial.reduce.add), [[COPY]](<4 x s32>), [[CONCAT_VECTORS]](<16 x s32>) ; CHECK-NEXT: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[INT]](<4 x s32>) ; CHECK-NEXT: $w0 = COPY [[VECREDUCE_ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 @@ -25,7 +25,7 @@ body: | %4:_(<4 x s32>) = COPY $q3 %5:_(<4 x s32>) = COPY $q4 %1:_(<16 x s32>) = G_CONCAT_VECTORS %2:_(<4 x s32>), %3:_(<4 x s32>), %4:_(<4 x s32>), %5:_(<4 x s32>) - %6:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.experimental.vector.partial.reduce.add), %0:_(<4 x s32>), %1:_(<16 x s32>) + %6:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.vector.partial.reduce.add), %0:_(<4 x s32>), %1:_(<16 x s32>) %7:_(s32) = G_VECREDUCE_ADD %6:_(<4 x s32>) $w0 = COPY %7:_(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll index 11cf4c31936d8..ebb2da9a3edd2 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll @@ -45,10 +45,10 @@ define i32 @cdotp_i8_rot0( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -71,10 +71,10 @@ define i32 @cdotp_i8_rot0( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -96,10 +96,10 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.imag.ext %imag.mul.neg = sub zeroinitializer, %imag.mul - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul.neg) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul.neg) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -146,9 +146,9 @@ define i32 @cdotp_i8_rot90( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -171,9 +171,9 @@ define i32 @cdotp_i8_rot90( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -195,9 +195,9 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.imag.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.real.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -244,9 +244,9 @@ define i32 @cdotp_i8_rot180( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -269,9 +269,9 @@ define i32 @cdotp_i8_rot180( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -293,9 +293,9 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.imag.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -343,9 +343,9 @@ define i32 @cdotp_i8_rot270( %a, %b) { ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -369,9 +369,9 @@ define i32 @cdotp_i8_rot270( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -394,9 +394,9 @@ vector.body: ; preds = %vector.body, %entry %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.imag.ext %real.mul.neg = sub zeroinitializer, %real.mul - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul.neg) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul.neg) %imag.mul = mul %b.imag.ext, %a.real.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -443,10 +443,10 @@ define i64 @cdotp_i16_rot0( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -469,10 +469,10 @@ define i64 @cdotp_i16_rot0( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -494,10 +494,10 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.imag.ext %imag.mul.neg = sub zeroinitializer, %imag.mul - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul.neg) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul.neg) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -544,9 +544,9 @@ define i64 @cdotp_i16_rot90( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -569,9 +569,9 @@ define i64 @cdotp_i16_rot90( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -593,9 +593,9 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.imag.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.real.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -642,9 +642,9 @@ define i64 @cdotp_i16_rot180( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -667,9 +667,9 @@ define i64 @cdotp_i16_rot180( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -691,9 +691,9 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.imag.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -741,9 +741,9 @@ define i64 @cdotp_i16_rot270( %a, %b) { ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -767,9 +767,9 @@ define i64 @cdotp_i16_rot270( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[REAL_MUL_REDUCED]], [[IMAG_MUL]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[PARTIAL_REDUCE_SUB]]) @@ -792,9 +792,9 @@ vector.body: ; preds = %vector.body, %entry %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.imag.ext %real.mul.neg = sub zeroinitializer, %real.mul - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul.neg) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %vec.phi, %real.mul.neg) %imag.mul = mul %b.imag.ext, %a.real.ext - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %real.mul.reduced, %imag.mul) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -822,10 +822,10 @@ define i32 @not_cdotp( %a, %b) { ; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] ; CHECK-SVE2-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE2: [[MIDDLE_BLOCK]]: ; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -849,10 +849,10 @@ define i32 @not_cdotp( %a, %b) { ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] ; CHECK-SVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -876,10 +876,10 @@ define i32 @not_cdotp( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] ; CHECK-NOSVE-NEXT: [[REAL_MUL_NEG:%.*]] = sub zeroinitializer, [[REAL_MUL]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[REAL_MUL_NEG]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE_SUB]]) @@ -902,10 +902,10 @@ vector.body: ; preds = %vector.body, %entry %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext %real.mul.neg = sub zeroinitializer, %real.mul - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul.neg) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %real.mul.neg) %imag.mul = mul %b.imag.ext, %a.imag.ext %imag.mul.neg = sub zeroinitializer, %imag.mul - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul.neg) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %real.mul.reduced, %imag.mul.neg) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -931,10 +931,10 @@ define i16 @invalid_type( %a, %b) { ; CHECK-SVE2-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE2: [[MIDDLE_BLOCK]]: ; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16( [[PARTIAL_REDUCE_SUB]]) @@ -957,10 +957,10 @@ define i16 @invalid_type( %a, %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16( [[PARTIAL_REDUCE_SUB]]) @@ -983,10 +983,10 @@ define i16 @invalid_type( %a, %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext [[B_REAL]] to ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext [[B_IMAG]] to ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[VEC_PHI]], [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub zeroinitializer, [[IMAG_MUL]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( [[REAL_MUL_REDUCED]], [[IMAG_MUL_NEG]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16( [[PARTIAL_REDUCE_SUB]]) @@ -1008,10 +1008,10 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext %b.real to %b.imag.ext = sext %b.imag to %real.mul = mul %b.real.ext, %a.real.ext - %real.mul.reduced = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( %vec.phi, %real.mul) + %real.mul.reduced = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( %vec.phi, %real.mul) %imag.mul = mul %b.imag.ext, %a.imag.ext %imag.mul.neg = sub zeroinitializer, %imag.mul - %partial.reduce.sub = call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32( %real.mul.reduced, %imag.mul.neg) + %partial.reduce.sub = call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32( %real.mul.reduced, %imag.mul.neg) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -1037,10 +1037,10 @@ define i32 @not_cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) { ; CHECK-SVE2-NEXT: [[B_REAL_EXT:%.*]] = sext <16 x i8> [[B_REAL]] to <16 x i32> ; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext <16 x i8> [[B_IMAG]] to <16 x i32> ; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul <16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) +; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) ; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul <16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <16 x i32> zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) ; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE2: [[MIDDLE_BLOCK]]: ; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE_SUB]]) @@ -1063,10 +1063,10 @@ define i32 @not_cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) { ; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext <16 x i8> [[B_REAL]] to <16 x i32> ; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <16 x i8> [[B_IMAG]] to <16 x i32> ; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul <16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) +; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) ; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul <16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <16 x i32> zeroinitializer, [[IMAG_MUL]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE_SUB]]) @@ -1089,10 +1089,10 @@ define i32 @not_cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) { ; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext <16 x i8> [[B_REAL]] to <16 x i32> ; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <16 x i8> [[B_IMAG]] to <16 x i32> ; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul <16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]] -; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) +; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[REAL_MUL]]) ; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul <16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <16 x i32> zeroinitializer, [[IMAG_MUL]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[REAL_MUL_REDUCED]], <16 x i32> [[IMAG_MUL_NEG]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE_SUB]]) @@ -1114,10 +1114,10 @@ vector.body: ; preds = %vector.body, %entry %b.real.ext = sext <16 x i8> %b.real to <16 x i32> %b.imag.ext = sext <16 x i8> %b.imag to <16 x i32> %real.mul = mul <16 x i32> %b.real.ext, %a.real.ext - %real.mul.reduced = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %vec.phi, <16 x i32> %real.mul) + %real.mul.reduced = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %vec.phi, <16 x i32> %real.mul) %imag.mul = mul <16 x i32> %b.imag.ext, %a.imag.ext %imag.mul.neg = sub <16 x i32> zeroinitializer, %imag.mul - %partial.reduce.sub = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %real.mul.reduced, <16 x i32> %imag.mul.neg) + %partial.reduce.sub = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %real.mul.reduced, <16 x i32> %imag.mul.neg) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -1125,11 +1125,11 @@ middle.block: ; preds = %vector.body ret i32 %0 } -declare @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i32(, ) +declare @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv2i64.nxv8i32(, ) -declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) +declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.add.nxv4i32() diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll index faefaf9bad7b1..d258ae0b376a1 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-unrolled-cdot.ll @@ -38,12 +38,12 @@ define i32 @cdotp_i8_rot0( %a0, %b0, [[B1_IMAG]] to ; CHECK-SVE2-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] ; CHECK-SVE2-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) ; CHECK-SVE2-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] ; CHECK-SVE2-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) -; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) ; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE2: [[MIDDLE_BLOCK]]: ; CHECK-SVE2-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] @@ -81,12 +81,12 @@ define i32 @cdotp_i8_rot0( %a0, %b0, [[B1_IMAG]] to ; CHECK-SVE-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] ; CHECK-SVE-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) ; CHECK-SVE-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] ; CHECK-SVE-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) -; CHECK-SVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-SVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) ; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-SVE: [[MIDDLE_BLOCK]]: ; CHECK-SVE-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] @@ -124,12 +124,12 @@ define i32 @cdotp_i8_rot0( %a0, %b0, [[B1_IMAG]] to ; CHECK-NOSVE-NEXT: [[TMP2:%.*]] = mul nsw [[B0_IMAG_EXT]], [[A0_IMAG_EXT]] ; CHECK-NOSVE-NEXT: [[TMP3:%.*]] = mul nsw [[B1_IMAG_EXT]], [[A1_IMAG_EXT]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP0]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE32:%.*]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI25]], [[TMP1]]) ; CHECK-NOSVE-NEXT: [[TMP4:%.*]] = sub nsw zeroinitializer, [[TMP2]] ; CHECK-NOSVE-NEXT: [[TMP5:%.*]] = sub nsw zeroinitializer, [[TMP3]] -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) -; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE33]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE]], [[TMP4]]) +; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE34]] = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[PARTIAL_REDUCE32]], [[TMP5]]) ; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]] ; CHECK-NOSVE: [[MIDDLE_BLOCK]]: ; CHECK-NOSVE-NEXT: [[BIN_RDX:%.*]] = add [[PARTIAL_REDUCE34]], [[PARTIAL_REDUCE33]] @@ -166,12 +166,12 @@ vector.body: ; preds = %vector.body, %entry %b1.imag.ext = sext %b1.imag to %24 = mul nsw %b0.imag.ext, %a0.imag.ext %25 = mul nsw %b1.imag.ext, %a1.imag.ext - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %18) - %partial.reduce32 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi25, %19) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi, %18) + %partial.reduce32 = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %vec.phi25, %19) %26 = sub nsw zeroinitializer, %24 %27 = sub nsw zeroinitializer, %25 - %partial.reduce33 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce, %26) - %partial.reduce34 = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce32, %27) + %partial.reduce33 = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce, %26) + %partial.reduce34 = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %partial.reduce32, %27) br i1 true, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -180,11 +180,11 @@ middle.block: ; preds = %vector.body ret i32 %29 } -declare @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i32(, ) +declare @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv2i64.nxv8i32(, ) -declare <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) +declare <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32>, <16 x i32>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.add.nxv4i32() diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll index 2d81a264e02bc..c38516fc57bbd 100644 --- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll @@ -26,7 +26,7 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) { %u.wide = zext <16 x i8> %u to <16 x i32> %s.wide = zext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -96,7 +96,7 @@ vector.body: %load2 = load <16 x i8>, ptr %gep2, align 16 %load2.wide = zext <16 x i8> %load2 to <16 x i32> %mul = mul nuw nsw <16 x i32> %load1.wide, %load2.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) %index.next = add nuw i64 %index, 16 %cmp = icmp eq i64 %index.next, 16 br i1 %cmp, label %end, label %vector.body @@ -133,7 +133,7 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) { %u.wide = zext <8 x i8> %u to <8 x i32> %s.wide = zext <8 x i8> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) ret <2 x i32> %partial.reduce } @@ -160,7 +160,7 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) { %u.wide = sext <16 x i8> %u to <16 x i32> %s.wide = sext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -192,7 +192,7 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) { %u.wide = sext <8 x i8> %u to <8 x i32> %s.wide = sext <8 x i8> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) ret <2 x i32> %partial.reduce } @@ -228,7 +228,7 @@ define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) { %u.wide = zext <16 x i8> %u to <16 x i32> %s.wide = sext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -307,7 +307,7 @@ vector.body: %load2 = load <16 x i8>, ptr %gep2, align 16 %load2.wide = zext <16 x i8> %load2 to <16 x i32> %mul = mul nuw nsw <16 x i32> %load1.wide, %load2.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) %index.next = add nuw i64 %index, 16 %cmp = icmp eq i64 %index.next, 16 br i1 %cmp, label %end, label %vector.body @@ -358,7 +358,7 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{ %u.wide = zext <8 x i8> %u to <8 x i32> %s.wide = sext <8 x i8> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) ret <2 x i32> %partial.reduce } @@ -394,7 +394,7 @@ define <4 x i32> @sudot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) #0{ %s.wide = sext <16 x i8> %u to <16 x i32> %u.wide = zext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %u.wide, %s.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -473,7 +473,7 @@ vector.body: %load2 = load <16 x i8>, ptr %gep2, align 16 %load2.wide = sext <16 x i8> %load2 to <16 x i32> %mul = mul nuw nsw <16 x i32> %load1.wide, %load2.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %mul) %index.next = add nuw i64 %index, 16 %cmp = icmp eq i64 %index.next, 16 br i1 %cmp, label %end, label %vector.body @@ -524,7 +524,7 @@ define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{ %u.wide = sext <8 x i8> %u to <8 x i32> %s.wide = zext <8 x i8> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <8 x i32> %mult) ret <2 x i32> %partial.reduce } @@ -566,7 +566,7 @@ entry: %a.wide = zext <16 x i8> %a to <16 x i64> %b.wide = zext <16 x i8> %b to <16 x i64> %mult = mul nuw nsw <16 x i64> %a.wide, %b.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64( + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64( <4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -609,7 +609,7 @@ entry: %a.wide = sext <16 x i8> %a to <16 x i64> %b.wide = sext <16 x i8> %b to <16 x i64> %mult = mul nuw nsw <16 x i64> %a.wide, %b.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64( + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64( <4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -674,7 +674,7 @@ entry: %a.wide = zext <16 x i8> %a to <16 x i64> %b.wide = sext <16 x i8> %b to <16 x i64> %mult = mul nuw nsw <16 x i64> %a.wide, %b.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64( + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64( <4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -739,7 +739,7 @@ entry: %a.wide = sext <16 x i8> %a to <16 x i64> %b.wide = zext <16 x i8> %b to <16 x i64> %mult = mul nuw nsw <16 x i64> %a.wide, %b.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64( + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64( <4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -767,7 +767,7 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: udot v0.4s, v1.16b, v2.16b ; CHECK-DOT-I8MM-NEXT: ret %a.wide = zext <16 x i8> %a to <16 x i32> - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide) ret <4 x i32> %partial.reduce } @@ -832,7 +832,7 @@ vector.body: %gep = getelementptr i8, ptr %p, i64 %index %load = load <16 x i8>, ptr %gep, align 16 %load.wide = zext <16 x i8> %load to <16 x i32> - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %load.wide) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %load.wide) %index.next = add nuw i64 %index, 16 %cmp = icmp eq i64 %index.next, 16 br i1 %cmp, label %end, label %vector.body @@ -864,7 +864,7 @@ define <4 x i32> @sdot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: sdot v0.4s, v1.16b, v2.16b ; CHECK-DOT-I8MM-NEXT: ret %a.wide = sext <16 x i8> %a to <16 x i32> - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide) ret <4 x i32> %partial.reduce } @@ -896,7 +896,7 @@ define <2 x i32> @udot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: udot v0.2s, v1.8b, v2.8b ; CHECK-DOT-I8MM-NEXT: ret %a.wide = zext <8 x i8> %a to <8 x i32> - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide) ret <2 x i32> %partial.reduce } @@ -928,7 +928,7 @@ define <2 x i32> @sdot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: sdot v0.2s, v1.8b, v2.8b ; CHECK-DOT-I8MM-NEXT: ret %a.wide = sext <8 x i8> %a to <8 x i32> - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide) ret <2 x i32> %partial.reduce } @@ -969,7 +969,7 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: uaddw2 v0.2d, v0.2d, v4.4s ; CHECK-DOT-I8MM-NEXT: ret %a.wide = zext <16 x i8> %a to <16 x i64> - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide) ret <4 x i64> %partial.reduce } @@ -1010,7 +1010,7 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){ ; CHECK-DOT-I8MM-NEXT: saddw2 v0.2d, v0.2d, v4.4s ; CHECK-DOT-I8MM-NEXT: ret %a.wide = sext <16 x i8> %a to <16 x i64> - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide) ret <4 x i64> %partial.reduce } @@ -1024,7 +1024,7 @@ define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{ %u.wide = zext <8 x i8> %u to <8 x i32> %s.wide = zext <8 x i8> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <8 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -1042,7 +1042,7 @@ define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) { %u.wide = zext <4 x i8> %u to <4 x i32> %s.wide = zext <4 x i8> %s to <4 x i32> %mult = mul nuw nsw <4 x i32> %s.wide, %u.wide - %partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <4 x i32> %mult) + %partial.reduce = tail call <2 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <4 x i32> %mult) ret <2 x i32> %partial.reduce } @@ -1063,7 +1063,7 @@ entry: %a.wide = zext <8 x i16> %a to <8 x i64> %b.wide = zext <8 x i8> %b to <8 x i64> %mult = mul nuw nsw <8 x i64> %a.wide, %b.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -1084,7 +1084,7 @@ entry: %a.wide = sext <8 x i16> %a to <8 x i64> %b.wide = sext <8 x i8> %b to <8 x i64> %mult = mul nuw nsw <8 x i64> %a.wide, %b.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -1105,7 +1105,7 @@ entry: %a.wide = zext <8 x i16> %a to <8 x i64> %b.wide = sext <8 x i8> %b to <8 x i64> %mult = mul nuw nsw <8 x i64> %a.wide, %b.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -1126,7 +1126,7 @@ entry: %a.wide = sext <8 x i16> %a to <8 x i64> %b.wide = zext <8 x i8> %b to <8 x i64> %mult = mul nuw nsw <8 x i64> %a.wide, %b.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -1227,10 +1227,10 @@ vector.body: %sext1 = sext <16 x i8> %load1 to <16 x i32> %zext = zext <16 x i8> %load3 to <16 x i32> %mul1 = mul <16 x i32> %sext1, %zext - %psum1 = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc1, <16 x i32> %mul1) + %psum1 = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc1, <16 x i32> %mul1) %sext2 = sext <16 x i8> %load2 to <16 x i32> %mul2 = mul <16 x i32> %sext2, %zext - %psum2 = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc2, <16 x i32> %mul2) + %psum2 = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc2, <16 x i32> %mul2) %iv.next = add i64 %iv, 16 %1 = icmp eq i64 %iv.next, 1024 br i1 %1, label %end, label %vector.body @@ -1252,6 +1252,6 @@ define <2 x i64> @udot_16to64(<2 x i64> %acc, <8 x i16> %input){ ; CHECK-COMMON-NEXT: ret entry: %input.wide = zext <8 x i16> %input to <8 x i64> - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %input.wide) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %input.wide) ret <2 x i64> %partial.reduce } diff --git a/llvm/test/CodeGen/AArch64/partial-reduction-add.ll b/llvm/test/CodeGen/AArch64/partial-reduction-add.ll index c3828c3d695c4..139adb295ac7c 100644 --- a/llvm/test/CodeGen/AArch64/partial-reduction-add.ll +++ b/llvm/test/CodeGen/AArch64/partial-reduction-add.ll @@ -10,7 +10,7 @@ define <4 x i32> @partial_reduce_add_fixed(<4 x i32> %accumulator, <4 x i32> %0) ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: - %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v4i32(<4 x i32> %accumulator, <4 x i32> %0) + %partial.reduce = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v4i32(<4 x i32> %accumulator, <4 x i32> %0) ret <4 x i32> %partial.reduce } @@ -21,7 +21,7 @@ define <4 x i32> @partial_reduce_add_fixed_half(<4 x i32> %accumulator, <8 x i32 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret entry: - %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %accumulator, <8 x i32> %0) + %partial.reduce = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v4i32.v8i32(<4 x i32> %accumulator, <8 x i32> %0) ret <4 x i32> %partial.reduce } @@ -31,7 +31,7 @@ define @partial_reduce_add( %accumulator, < ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32( %accumulator, %0) ret %partial.reduce } @@ -42,7 +42,7 @@ define @partial_reduce_add_half( %accumulat ; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32( %accumulator, %0) ret %partial.reduce } @@ -55,7 +55,7 @@ define @partial_reduce_add_quart( %accumula ; CHECK-NEXT: add z0.s, z0.s, z4.s ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32( %accumulator, %0) ret %partial.reduce } @@ -68,14 +68,14 @@ define @partial_reduce_add_half_8( %accumul ; CHECK-NEXT: add z1.s, z1.s, z5.s ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32( %accumulator, %0) ret %partial.reduce } -declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(, ) -declare @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv4i32(, ) +declare @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv8i32(, ) +declare @llvm.vector.partial.reduce.add.nxv4i32.nxv4i32.nxv16i32(, ) +declare @llvm.vector.partial.reduce.add.nxv8i32.nxv8i32.nxv16i32(, ) declare i32 @llvm.vector.reduce.add.nxv4i32() declare i32 @llvm.vector.reduce.add.nxv8i32() diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll index cc19f6c2cbbc8..e71d983664cd9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll @@ -35,7 +35,7 @@ define <8 x i16> @two_way_i8_i16_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <16 x i8> %u to <16 x i16> %s.wide = zext <16 x i8> %s to <16 x i16> %mult = mul nuw nsw <16 x i16> %s.wide, %u.wide - %partial.reduce = tail call <8 x i16> @llvm.experimental.vector.partial.reduce.add(<8 x i16> %acc, <16 x i16> %mult) + %partial.reduce = tail call <8 x i16> @llvm.vector.partial.reduce.add(<8 x i16> %acc, <16 x i16> %mult) ret <8 x i16> %partial.reduce } @@ -70,7 +70,7 @@ define <16 x i16> @two_way_i8_i16_vl128_double_width(ptr %accptr, ptr %uptr, ptr %u.wide = zext <32 x i8> %u to <32 x i16> %s.wide = zext <32 x i8> %s to <32 x i16> %mult = mul nuw nsw <32 x i16> %s.wide, %u.wide - %partial.reduce = tail call <16 x i16> @llvm.experimental.vector.partial.reduce.add(<16 x i16> %acc, <32 x i16> %mult) + %partial.reduce = tail call <16 x i16> @llvm.vector.partial.reduce.add(<16 x i16> %acc, <32 x i16> %mult) ret <16 x i16> %partial.reduce } @@ -124,7 +124,7 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal %u.wide = zext <32 x i8> %u to <32 x i16> %s.wide = zext <32 x i8> %s to <32 x i16> %mult = mul nuw nsw <32 x i16> %s.wide, %u.wide - %partial.reduce = tail call <16 x i16> @llvm.experimental.vector.partial.reduce.add(<16 x i16> %acc, <32 x i16> %mult) + %partial.reduce = tail call <16 x i16> @llvm.vector.partial.reduce.add(<16 x i16> %acc, <32 x i16> %mult) ret <16 x i16> %partial.reduce } @@ -158,7 +158,7 @@ define <4 x i32> @two_way_i16_i32_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <8 x i16> %u to <8 x i32> %s.wide = zext <8 x i16> %s to <8 x i32> %mult = mul nuw nsw <8 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -193,7 +193,7 @@ define <8 x i32> @two_way_i16_i32_vl128_double_width(ptr %accptr, ptr %uptr, ptr %u.wide = zext <16 x i16> %u to <16 x i32> %s.wide = zext <16 x i16> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -247,7 +247,7 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal %u.wide = zext <16 x i16> %u to <16 x i32> %s.wide = zext <16 x i16> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -281,7 +281,7 @@ define <2 x i64> @two_way_i32_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <4 x i32> %u to <4 x i64> %s.wide = zext <4 x i32> %s to <4 x i64> %mult = mul nuw nsw <4 x i64> %s.wide, %u.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <4 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <4 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -316,7 +316,7 @@ define <4 x i64> @two_way_i32_i64_vl128_double_width(ptr %accptr, ptr %uptr, ptr %u.wide = zext <8 x i32> %u to <8 x i64> %s.wide = zext <8 x i32> %s to <8 x i64> %mult = mul nuw nsw <8 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <8 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -370,7 +370,7 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal %u.wide = zext <8 x i32> %u to <8 x i64> %s.wide = zext <8 x i32> %s to <8 x i64> %mult = mul nuw nsw <8 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <8 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -403,7 +403,7 @@ define <4 x i32> @four_way_i8_i32_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <16 x i8> %u to <16 x i32> %s.wide = zext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -430,7 +430,7 @@ define <4 x i32> @four_way_i8_i32_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr) %u.wide = zext <16 x i8> %u to <16 x i32> %s.wide = sext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -457,7 +457,7 @@ define <4 x i32> @four_way_i8_i32_vl128_sudot(ptr %accptr, ptr %uptr, ptr %sptr) %u.wide = sext <16 x i8> %u to <16 x i32> %s.wide = zext <16 x i8> %s to <16 x i32> %mult = mul nuw nsw <16 x i32> %s.wide, %u.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <16 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -504,7 +504,7 @@ define <2 x i64> @four_way_i8_i64_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr) %u.wide = zext <16 x i8> %u to <16 x i64> %s.wide = sext <16 x i8> %s to <16 x i64> %mult = mul nuw nsw <16 x i64> %s.wide, %u.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <16 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <16 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -551,7 +551,7 @@ define <2 x i64> @four_way_i16_i64_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr %u.wide = zext <8 x i16> %u to <8 x i64> %s.wide = sext <8 x i16> %s to <8 x i64> %mult = mul nuw nsw <8 x i64> %s.wide, %u.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -582,7 +582,7 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width(ptr %accptr, ptr %uptr, ptr %u.wide = zext <32 x i8> %u to <32 x i32> %s.wide = zext <32 x i8> %s to <32 x i32> %mult = mul nuw nsw <32 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -613,7 +613,7 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width_usdot(ptr %accptr, ptr %upt %u.wide = zext <32 x i8> %u to <32 x i32> %s.wide = sext <32 x i8> %s to <32 x i32> %mult = mul nuw nsw <32 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -658,7 +658,7 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal %u.wide = zext <32 x i8> %u to <32 x i32> %s.wide = zext <32 x i8> %s to <32 x i32> %mult = mul nuw nsw <32 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -703,7 +703,7 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr) %u.wide = zext <32 x i8> %u to <32 x i32> %s.wide = sext <32 x i8> %s to <32 x i32> %mult = mul nuw nsw <32 x i32> %s.wide, %u.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <32 x i32> %mult) ret <8 x i32> %partial.reduce } @@ -740,7 +740,7 @@ define <2 x i64> @four_way_i16_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <8 x i16> %u to <8 x i64> %s.wide = zext <8 x i16> %s to <8 x i64> %mult = mul nuw nsw <8 x i64> %s.wide, %u.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -781,7 +781,7 @@ define <4 x i64> @four_way_i16_i64_vl128_double_width(ptr %accptr, ptr %uptr, pt %u.wide = zext <16 x i16> %u to <16 x i64> %s.wide = zext <16 x i16> %s to <16 x i64> %mult = mul nuw nsw <16 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <16 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -836,7 +836,7 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca %u.wide = zext <16 x i16> %u to <16 x i64> %s.wide = zext <16 x i16> %s to <16 x i64> %mult = mul nuw nsw <16 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <16 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <16 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -889,7 +889,7 @@ define <2 x i64> @eight_way_i8_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) { %u.wide = zext <16 x i8> %u to <16 x i64> %s.wide = zext <16 x i8> %s to <16 x i64> %mult = mul nuw nsw <16 x i64> %s.wide, %u.wide - %partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add(<2 x i64> %acc, <16 x i64> %mult) + %partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <16 x i64> %mult) ret <2 x i64> %partial.reduce } @@ -953,7 +953,7 @@ define <4 x i64> @four_way_i8_i64_vl128_double_width(ptr %accptr, ptr %uptr, ptr %u.wide = zext <32 x i8> %u to <32 x i64> %s.wide = zext <32 x i8> %s to <32 x i64> %mult = mul nuw nsw <32 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <32 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <32 x i64> %mult) ret <4 x i64> %partial.reduce } @@ -1010,6 +1010,6 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal %u.wide = zext <32 x i8> %u to <32 x i64> %s.wide = zext <32 x i8> %s to <32 x i64> %mult = mul nuw nsw <32 x i64> %s.wide, %u.wide - %partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add(<4 x i64> %acc, <32 x i64> %mult) + %partial.reduce = tail call <4 x i64> @llvm.vector.partial.reduce.add(<4 x i64> %acc, <32 x i64> %mult) ret <4 x i64> %partial.reduce } diff --git a/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll index b2cde51e99619..da0c01f13b960 100644 --- a/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll +++ b/llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll @@ -22,7 +22,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -45,7 +45,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -68,7 +68,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %accc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %accc, %mult) ret %partial.reduce } @@ -91,7 +91,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -130,7 +130,7 @@ entry: %a.wide = zext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -169,7 +169,7 @@ entry: %a.wide = sext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -201,7 +201,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %mult) ret %partial.reduce } @@ -234,7 +234,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %mult) ret %partial.reduce } @@ -300,7 +300,7 @@ entry: %a.wide = zext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %mult) ret %partial.reduce } @@ -366,7 +366,7 @@ entry: %a.wide = sext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %mult) ret %partial.reduce } @@ -390,7 +390,7 @@ define @udot_no_bin_op( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %a.ext) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %a.ext) ret %partial.reduce } @@ -413,7 +413,7 @@ define @sdot_no_bin_op( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %a.ext) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %a.ext) ret %partial.reduce } @@ -437,7 +437,7 @@ define @udot_no_bin_op_wide( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %a.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %a.wide) ret %partial.reduce } @@ -461,7 +461,7 @@ define @sdot_no_bin_op_wide( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %a.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %a.wide) ret %partial.reduce } @@ -493,7 +493,7 @@ define @udot_no_bin_op_8to64( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %a.ext) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %a.ext) ret %partial.reduce } @@ -525,7 +525,7 @@ define @sdot_no_bin_op_8to64( %acc, %a to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %a.ext) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv16i64( %acc, %a.ext) ret %partial.reduce } @@ -557,7 +557,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -589,7 +589,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %mult) ret %partial.reduce } @@ -660,7 +660,7 @@ entry: %a.wide = zext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -731,7 +731,7 @@ entry: %a.wide = sext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -805,7 +805,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -882,7 +882,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -959,7 +959,7 @@ entry: %a.wide = zext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -1033,7 +1033,7 @@ entry: %a.wide = sext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -1062,7 +1062,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i16.nxv8i16( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i16.nxv8i16( %acc, %mult) ret %partial.reduce } @@ -1094,7 +1094,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i16.nxv8i16( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i16.nxv8i16( %acc, %mult) ret %partial.reduce } @@ -1123,7 +1123,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64( + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv8i64( %acc, %mult) ret %partial.reduce } @@ -1149,7 +1149,7 @@ define @sdot_imm( %acc, entry: %a.wide = sext %a to %mult = mul nuw nsw %a.wide, splat(i32 -1) - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -1210,7 +1210,7 @@ define @sdot_imm_does_not_fit( %acc, %a to %mult = mul nuw nsw %a.wide, splat(i32 256) - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -1235,7 +1235,7 @@ define @udot_imm( %acc, entry: %a.wide = zext %a to %mult = mul nuw nsw %a.wide, splat(i32 255) - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } @@ -1296,6 +1296,6 @@ define @udot_imm_does_not_fit( %acc, %a to %mult = mul nuw nsw %a.wide, splat(i32 256) - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( %acc, %mult) ret %partial.reduce } diff --git a/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll b/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll index e62979d077fd2..cf738b61a01ee 100644 --- a/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll +++ b/llvm/test/CodeGen/AArch64/sve-partial-reduce-wide-add.ll @@ -18,7 +18,7 @@ define @signed_wide_add_nxv4i32( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %input.wide) ret %partial.reduce } @@ -38,7 +38,7 @@ define @unsigned_wide_add_nxv4i32( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i64.nxv4i64( %acc, %input.wide) ret %partial.reduce } @@ -58,7 +58,7 @@ define @signed_wide_add_nxv8i16( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32( %acc, %input.wide) ret %partial.reduce } @@ -78,7 +78,7 @@ define @unsigned_wide_add_nxv8i16( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv8i32( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32( %acc, %input.wide) ret %partial.reduce } @@ -98,7 +98,7 @@ define @signed_wide_add_nxv16i8( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i16( %acc, %input.wide) ret %partial.reduce } @@ -118,7 +118,7 @@ define @unsigned_wide_add_nxv16i8( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv8i16.nxv16i16( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv8i16.nxv16i16( %acc, %input.wide) ret %partial.reduce } @@ -142,7 +142,7 @@ define @signed_wide_add_nxv4i16( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i32.nxv4i32( %acc, %input.wide) ret %partial.reduce } @@ -164,7 +164,7 @@ define @unsigned_wide_add_nxv4i16( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv4i32( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv2i32.nxv4i32( %acc, %input.wide) ret %partial.reduce } @@ -190,7 +190,7 @@ define @signed_wide_add_nxv8i32( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv8i64( %acc, %input.wide) ret %partial.reduce } @@ -216,6 +216,6 @@ define @unsigned_wide_add_nxv8i32( %acc, %input to - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add.nxv4i64.nxv8i64( %acc, %input.wide) + %partial.reduce = tail call @llvm.vector.partial.reduce.add.nxv4i64.nxv8i64( %acc, %input.wide) ret %partial.reduce } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll index 684eb609635ef..e6ca6875e1412 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll @@ -537,7 +537,7 @@ entry: %a.sext = sext <4 x i8> %a to <4 x i32> %b.sext = sext <4 x i8> %b to <4 x i32> %mul = mul <4 x i32> %a.sext, %b.sext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -570,7 +570,7 @@ entry: %a.sext = zext <4 x i8> %a to <4 x i32> %b.sext = zext <4 x i8> %b to <4 x i32> %mul = mul <4 x i32> %a.sext, %b.sext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -605,7 +605,7 @@ define <1 x i32> @vqdotu_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) { entry: %a.ext = zext <4 x i8> %a to <4 x i32> %mul = mul <4 x i32> %a.ext, splat (i32 128) - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -641,7 +641,7 @@ define <1 x i32> @vqdot_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) { entry: %a.ext = sext <4 x i8> %a to <4 x i32> %mul = mul <4 x i32> %a.ext, splat (i32 -128) - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -675,7 +675,7 @@ entry: %a.sext = sext <4 x i8> %a to <4 x i32> %b.sext = zext <4 x i8> %b to <4 x i32> %mul = mul <4 x i32> %a.sext, %b.sext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -709,7 +709,7 @@ entry: %a.ext = sext <4 x i8> %a to <4 x i32> %b.ext = zext <4 x i8> %b to <4 x i32> %mul = mul <4 x i32> %b.ext, %a.ext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -732,7 +732,7 @@ define <1 x i32> @vqdotsu_vx_partial_reduce(<4 x i8> %a, <4 x i8> %b) { entry: %a.ext = sext <4 x i8> %a to <4 x i32> %mul = mul <4 x i32> %a.ext, splat (i32 128) - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <4 x i32> %mul) ret <1 x i32> %res } @@ -768,7 +768,7 @@ entry: %a.sext = sext <8 x i8> %a to <8 x i32> %b.sext = sext <8 x i8> %b to <8 x i32> %mul = mul <8 x i32> %a.sext, %b.sext - %res = call <2 x i32> @llvm.experimental.vector.partial.reduce.add(<2 x i32> zeroinitializer, <8 x i32> %mul) + %res = call <2 x i32> @llvm.vector.partial.reduce.add(<2 x i32> zeroinitializer, <8 x i32> %mul) ret <2 x i32> %res } @@ -945,7 +945,7 @@ entry: %a.sext = sext <64 x i8> %a to <64 x i32> %b.sext = sext <64 x i8> %b to <64 x i32> %mul = mul <64 x i32> %a.sext, %b.sext - %res = call <2 x i32> @llvm.experimental.vector.partial.reduce.add(<2 x i32> zeroinitializer, <64 x i32> %mul) + %res = call <2 x i32> @llvm.vector.partial.reduce.add(<2 x i32> zeroinitializer, <64 x i32> %mul) ret <2 x i32> %res } @@ -980,7 +980,7 @@ entry: %a.sext = sext <16 x i8> %a to <16 x i32> %b.sext = sext <16 x i8> %b to <16 x i32> %mul = mul <16 x i32> %a.sext, %b.sext - %res = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %mul) + %res = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %mul) ret <4 x i32> %res } @@ -1023,7 +1023,7 @@ entry: %a.sext = sext <64 x i8> %a to <64 x i32> %b.sext = sext <64 x i8> %b to <64 x i32> %mul = mul <64 x i32> %a.sext, %b.sext - %res = call <16 x i32> @llvm.experimental.vector.partial.reduce.add(<16 x i32> zeroinitializer, <64 x i32> %mul) + %res = call <16 x i32> @llvm.vector.partial.reduce.add(<16 x i32> zeroinitializer, <64 x i32> %mul) ret <16 x i32> %res } @@ -1059,7 +1059,7 @@ entry: %a.sext = sext <16 x i8> %a to <16 x i32> %b.sext = sext <16 x i8> %b to <16 x i32> %mul = mul <16 x i32> %a.sext, %b.sext - %res = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %accum, <16 x i32> %mul) + %res = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %accum, <16 x i32> %mul) ret <4 x i32> %res } @@ -1075,7 +1075,7 @@ entry: %a.sext = sext <16 x i8> %a to <16 x i32> %b.sext = sext <16 x i8> %b to <16 x i32> %mul = mul <16 x i32> %a.sext, %b.sext - %res = call <16 x i32> @llvm.experimental.vector.partial.reduce.add.nvx8i32.nvx16i32.nvx16i32(<16 x i32> %mul, <16 x i32> zeroinitializer) + %res = call <16 x i32> @llvm.vector.partial.reduce.add.nvx8i32.nvx16i32.nvx16i32(<16 x i32> %mul, <16 x i32> zeroinitializer) ret <16 x i32> %res } @@ -1370,7 +1370,7 @@ entry: %a.ext = sext <256 x i8> %a to <256 x i32> %b.ext = zext <256 x i8> %b to <256 x i32> %mul = mul <256 x i32> %b.ext, %a.ext - %res = call <64 x i32> @llvm.experimental.vector.partial.reduce.add(<64 x i32> zeroinitializer, <256 x i32> %mul) + %res = call <64 x i32> @llvm.vector.partial.reduce.add(<64 x i32> zeroinitializer, <256 x i32> %mul) ret <64 x i32> %res } @@ -1419,7 +1419,7 @@ entry: %a.ext = sext <16 x i7> %a to <16 x i31> %b.ext = zext <16 x i7> %b to <16 x i31> %mul = mul <16 x i31> %b.ext, %a.ext - %res = call <4 x i31> @llvm.experimental.vector.partial.reduce.add(<4 x i31> zeroinitializer, <16 x i31> %mul) + %res = call <4 x i31> @llvm.vector.partial.reduce.add(<4 x i31> zeroinitializer, <16 x i31> %mul) ret <4 x i31> %res } @@ -1441,7 +1441,7 @@ entry: %a.ext = sext <2 x i8> %a to <2 x i32> %b.ext = zext <2 x i8> %b to <2 x i32> %mul = mul <2 x i32> %b.ext, %a.ext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <2 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <2 x i32> %mul) ret <1 x i32> %res } @@ -1482,7 +1482,7 @@ entry: %a.ext = sext <8 x i8> %a to <8 x i32> %b.ext = zext <8 x i8> %b to <8 x i32> %mul = mul <8 x i32> %b.ext, %a.ext - %res = call <1 x i32> @llvm.experimental.vector.partial.reduce.add(<1 x i32> zeroinitializer, <8 x i32> %mul) + %res = call <1 x i32> @llvm.vector.partial.reduce.add(<1 x i32> zeroinitializer, <8 x i32> %mul) ret <1 x i32> %res } @@ -1516,7 +1516,7 @@ define <4 x i32> @partial_of_sext(<16 x i8> %a) { ; DOT-NEXT: ret entry: %a.ext = sext <16 x i8> %a to <16 x i32> - %res = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %a.ext) + %res = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %a.ext) ret <4 x i32> %res } @@ -1549,7 +1549,7 @@ define <4 x i32> @partial_of_zext(<16 x i8> %a) { ; DOT-NEXT: ret entry: %a.ext = zext <16 x i8> %a to <16 x i32> - %res = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %a.ext) + %res = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> zeroinitializer, <16 x i32> %a.ext) ret <4 x i32> %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/partial-reduction-add.ll b/llvm/test/CodeGen/RISCV/rvv/partial-reduction-add.ll index ff8037502a4e3..1ef168b765346 100644 --- a/llvm/test/CodeGen/RISCV/rvv/partial-reduction-add.ll +++ b/llvm/test/CodeGen/RISCV/rvv/partial-reduction-add.ll @@ -9,7 +9,7 @@ define <4 x i32> @partial_reduce_add_v4i32_v4i32(<4 x i32> %accumulator, <4 x i3 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret entry: - %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %accumulator, <4 x i32> %0) + %partial.reduce = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %accumulator, <4 x i32> %0) ret <4 x i32> %partial.reduce } @@ -24,7 +24,7 @@ define <4 x i32> @partial_reduce_add_v4i32_v8i32(<4 x i32> %accumulator, <8 x i3 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret entry: - %partial.reduce = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %accumulator, <8 x i32> %0) + %partial.reduce = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %accumulator, <8 x i32> %0) ret <4 x i32> %partial.reduce } @@ -35,7 +35,7 @@ define @partial_reduce_add_nvx4i32_nvx4i32( ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add( %accumulator, %0) ret %partial.reduce } @@ -47,7 +47,7 @@ define @partial_reduce_add_nvx4i32_nvx8i32( ; CHECK-NEXT: vadd.vv v8, v14, v8 ; CHECK-NEXT: ret entry: - %partial.reduce = call @llvm.experimental.vector.partial.reduce.add( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add( %accumulator, %0) ret %partial.reduce } @@ -61,7 +61,7 @@ define @partial_reduce_add_nvx4i32_nvx16i32( @llvm.experimental.vector.partial.reduce.add( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add( %accumulator, %0) ret %partial.reduce } @@ -73,7 +73,7 @@ define @partial_reduce_add_nvx8i32_nvx16i32( @llvm.experimental.vector.partial.reduce.add( %accumulator, %0) + %partial.reduce = call @llvm.vector.partial.reduce.add( %accumulator, %0) ret %partial.reduce } diff --git a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll index 87a984bda1fee..772895316ebc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/zvqdotq-sdnode.ll @@ -528,7 +528,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -556,7 +556,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -584,7 +584,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -615,7 +615,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -664,7 +664,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -828,7 +828,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -856,7 +856,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( %accum, %mul) + %res = call @llvm.vector.partial.reduce.add( %accum, %mul) ret %res } @@ -872,7 +872,7 @@ entry: %a.sext = sext %a to %b.sext = sext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add.nvx16i32.nvx16i32( %mul, zeroinitializer) + %res = call @llvm.vector.partial.reduce.add.nvx16i32.nvx16i32( %mul, zeroinitializer) ret %res } @@ -905,7 +905,7 @@ entry: %a.sext = zext %a to %b.sext = zext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -938,7 +938,7 @@ entry: %a.sext = sext %a to %b.sext = zext %b to %mul = mul %a.sext, %b.sext - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %mul) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %mul) ret %res } @@ -965,7 +965,7 @@ define @partial_of_sext( %a) { ; DOT-NEXT: ret entry: %a.ext = sext %a to - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %a.ext) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %a.ext) ret %res } @@ -991,7 +991,7 @@ define @partial_of_zext( %a) { ; DOT-NEXT: ret entry: %a.ext = zext %a to - %res = call @llvm.experimental.vector.partial.reduce.add( zeroinitializer, %a.ext) + %res = call @llvm.vector.partial.reduce.add( zeroinitializer, %a.ext) ret %res } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index c0995ec150c8d..f4784b6259ce1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -33,10 +33,10 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-NEON-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) ; CHECK-NEON-NEXT: [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEON-NEXT: [[TMP13:%.*]] = sub <16 x i32> zeroinitializer, [[TMP12]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP13]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP13]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -116,10 +116,10 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD1]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = sub zeroinitializer, [[TMP17]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -186,9 +186,9 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-NEON-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) ; CHECK-NEON-NEXT: [[TMP11:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP11]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP11]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -268,9 +268,9 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD1]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP13]], [[TMP15]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP17]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -338,9 +338,9 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-NEON-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> zeroinitializer, [[TMP10]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) ; CHECK-NEON-NEXT: [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP12]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP12]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -421,9 +421,9 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = sub nsw zeroinitializer, [[TMP16]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw [[TMP13]], [[TMP15]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -493,10 +493,10 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-NEON-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> zeroinitializer, [[TMP10]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) ; CHECK-NEON-NEXT: [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEON-NEXT: [[TMP13:%.*]] = sub <16 x i32> zeroinitializer, [[TMP12]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP13]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP13]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -577,10 +577,10 @@ define i32 @chained_partial_reduce_sub_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = sub nsw zeroinitializer, [[TMP16]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw [[TMP13]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = sub zeroinitializer, [[TMP18]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP19]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP19]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -650,11 +650,11 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-NEON-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) ; CHECK-NEON-NEXT: [[TMP11:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP11]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP11]]) ; CHECK-NEON-NEXT: [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE4]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE3]], <16 x i32> [[TMP12]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE4]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE3]], <16 x i32> [[TMP12]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -736,11 +736,11 @@ define i32 @chained_partial_reduce_add_add_add(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD1]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP13]], [[TMP15]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP17]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw [[TMP14]], [[TMP15]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE3]], [[TMP18]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE3]], [[TMP18]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -813,12 +813,12 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-NEON-NEXT: [[TMP11:%.*]] = sub nsw <16 x i32> zeroinitializer, [[TMP10]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) ; CHECK-NEON-NEXT: [[TMP12:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP9]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP12]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP12]]) ; CHECK-NEON-NEXT: [[TMP14:%.*]] = mul nsw <16 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEON-NEXT: [[TMP15:%.*]] = sub <16 x i32> zeroinitializer, [[TMP14]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE4]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE3]], <16 x i32> [[TMP15]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE4]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE3]], <16 x i32> [[TMP15]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -901,12 +901,12 @@ define i32 @chained_partial_reduce_sub_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] ; CHECK-SVE-MAXBW-NEXT: [[TMP17:%.*]] = sub nsw zeroinitializer, [[TMP16]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP17]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw [[TMP13]], [[TMP15]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP18]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = mul nsw [[TMP14]], [[TMP15]] ; CHECK-SVE-MAXBW-NEXT: [[TMP20:%.*]] = sub zeroinitializer, [[TMP19]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE3]], [[TMP20]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE4]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE3]], [[TMP20]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -979,8 +979,8 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-NEON-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP9]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP9]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1059,8 +1059,8 @@ define i32 @chained_partial_reduce_madd_extadd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD1]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP15]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP16]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP15]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1123,8 +1123,8 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-NEON-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP6:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP5]]) -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE2]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP6]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP5]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE2]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP6]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1195,8 +1195,8 @@ define i32 @chained_partial_reduce_extadd_extadd(ptr %a, ptr %b, i32 %N) #0 { ; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP8]], align 1 ; CHECK-SVE-MAXBW-NEXT: [[TMP11:%.*]] = sext [[WIDE_LOAD]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP12:%.*]] = sext [[WIDE_LOAD1]] to -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP11]]) -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE2]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP12]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP11]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE2]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP12]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1259,9 +1259,9 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-NEON-NEXT: [[TMP7:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEON-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP9]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP9]]) ; CHECK-NEON-NEXT: [[TMP10:%.*]] = mul nsw <16 x i32> [[TMP7]], [[TMP8]] -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP10]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[PARTIAL_REDUCE]], <16 x i32> [[TMP10]]) ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEON-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1339,9 +1339,9 @@ define i32 @chained_partial_reduce_extadd_madd(ptr %a, ptr %b, ptr %c, i32 %N) # ; CHECK-SVE-MAXBW-NEXT: [[TMP13:%.*]] = sext [[WIDE_LOAD]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP14:%.*]] = sext [[WIDE_LOAD1]] to ; CHECK-SVE-MAXBW-NEXT: [[TMP15:%.*]] = sext [[WIDE_LOAD2]] to -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP15]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP15]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw [[TMP13]], [[TMP14]] -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP16]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE3]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[PARTIAL_REDUCE]], [[TMP16]]) ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1404,7 +1404,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { ; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <16 x i32> @llvm.experimental.vector.partial.reduce.add.v16i32.v16i32(<16 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) +; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <16 x i32> @llvm.vector.partial.reduce.add.v16i32.v16i32(<16 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) ; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] ; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1478,7 +1478,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { ; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[NEXT_GEP]], align 1 ; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.experimental.vector.partial.reduce.add.nxv8i32.nxv8i32( [[VEC_PHI]], [[TMP7]]) +; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call @llvm.vector.partial.reduce.add.nxv8i32.nxv8i32( [[VEC_PHI]], [[TMP7]]) ; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] ; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index 9d4a969b571e7..1f72cc2be856d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -21,7 +21,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i32> [[TMP8]], [[TMP5]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP9]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -77,7 +77,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT2]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -491,7 +491,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32> ; CHECK-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]] ; CHECK-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 8766d6540ed19..2535de7a2b0c6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -38,8 +38,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = sext [[WIDE_LOAD4]] to ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[TMP12]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) -; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) +; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -82,8 +82,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP19:%.*]] = sext [[WIDE_LOAD4]] to ; CHECK-NOI8MM-NEXT: [[TMP20:%.*]] = mul [[TMP18]], [[TMP11]] ; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[TMP12]] -; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) -; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) +; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) +; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) ; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -149,8 +149,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = zext [[WIDE_LOAD4]] to ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[TMP12]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) -; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) +; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -193,8 +193,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[TMP19:%.*]] = zext [[WIDE_LOAD4]] to ; CHECK-NOI8MM-NEXT: [[TMP20:%.*]] = mul [[TMP18]], [[TMP11]] ; CHECK-NOI8MM-NEXT: [[TMP21:%.*]] = mul [[TMP19]], [[TMP12]] -; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) -; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) +; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP20]]) +; CHECK-NOI8MM-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP21]]) ; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NOI8MM-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NOI8MM-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -252,8 +252,8 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = mul <16 x i32> [[TMP8]], [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i32> [[TMP9]], [[TMP4]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) -; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -345,8 +345,8 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = mul <16 x i32> [[TMP8]], [[TMP3]] ; CHECK-NEXT: [[TMP11:%.*]] = mul <16 x i32> [[TMP9]], [[TMP4]] -; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) -; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) +; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index cc3203cdff46f..7b6c52cd2f39b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -23,7 +23,7 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]] -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -56,8 +56,8 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = mul <16 x i32> [[TMP9]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul <16 x i32> [[TMP10]], [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP11]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -83,7 +83,7 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = mul <16 x i32> [[TMP6]], [[TMP3]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP7]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-MAXBW-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -712,25 +712,25 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP13]] -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]]) ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = sext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP20]] -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]]) ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP25]] -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD11]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP28]], [[TMP30]] -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -785,8 +785,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD10]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = mul nsw <16 x i32> [[TMP17]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE1]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP19]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP20]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE1]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP19]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP20]]) ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1 @@ -799,8 +799,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP27]] ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul nsw <16 x i32> [[TMP24]], [[TMP28]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP29]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP30]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP29]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP30]]) ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 @@ -813,8 +813,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = mul nsw <16 x i32> [[TMP33]], [[TMP37]] ; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = mul nsw <16 x i32> [[TMP34]], [[TMP56]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP39]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP40]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP39]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP40]]) ; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP42]], align 1 @@ -827,8 +827,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = sext <16 x i8> [[WIDE_LOAD27]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = mul nsw <16 x i32> [[TMP43]], [[TMP47]] ; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = mul nsw <16 x i32> [[TMP44]], [[TMP48]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP49]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP50]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP49]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP50]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -876,25 +876,25 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = sext <16 x i8> [[WIDE_LOAD4]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP13]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP16]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = sext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP18]], [[TMP20]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP21]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP25]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = sext <16 x i8> [[WIDE_LOAD11]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP28]], [[TMP30]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1288,7 +1288,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]] ; CHECK-INTERLEAVE1-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-INTERLEAVE1-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1623,7 +1623,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]] ; CHECK-INTERLEAVED-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-INTERLEAVED-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1958,7 +1958,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[TMP178:%.*]] = sext <16 x i8> [[TMP177]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]] ; CHECK-MAXBW-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-MAXBW-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 17fbbbd1d6843..67e2c08139efe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -101,7 +101,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD4:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = zext [[WIDE_LOAD4]] to ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul [[TMP20]], [[TMP13]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP22]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE5]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI1]], [[TMP22]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -1275,25 +1275,25 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_LOAD9:%.*]] = load , ptr [[TMP8]], align 1 ; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = sext [[WIDE_LOAD9]] to ; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = mul nsw [[TMP29]], [[TMP23]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE11]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI7]], [[TMP31]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE11]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI7]], [[TMP31]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD12:%.*]] = load , ptr [[TMP10]], align 1 ; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = sext [[WIDE_LOAD12]] to ; CHECK-MAXBW-NEXT: [[WIDE_LOAD14:%.*]] = load , ptr [[TMP11]], align 1 ; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = sext [[WIDE_LOAD14]] to ; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = mul nsw [[TMP37]], [[TMP43]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI6]], [[TMP45]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI6]], [[TMP45]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD18:%.*]] = load , ptr [[TMP13]], align 1 ; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = sext [[WIDE_LOAD18]] to ; CHECK-MAXBW-NEXT: [[WIDE_LOAD20:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-MAXBW-NEXT: [[TMP57:%.*]] = sext [[WIDE_LOAD20]] to ; CHECK-MAXBW-NEXT: [[TMP59:%.*]] = mul nsw [[TMP51]], [[TMP57]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE17]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI5]], [[TMP59]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE17]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI5]], [[TMP59]]) ; CHECK-MAXBW-NEXT: [[WIDE_LOAD24:%.*]] = load , ptr [[TMP16]], align 1 ; CHECK-MAXBW-NEXT: [[TMP65:%.*]] = sext [[WIDE_LOAD24]] to ; CHECK-MAXBW-NEXT: [[WIDE_LOAD26:%.*]] = load , ptr [[TMP17]], align 1 ; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = sext [[WIDE_LOAD26]] to ; CHECK-MAXBW-NEXT: [[TMP73:%.*]] = mul nsw [[TMP65]], [[TMP71]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI4]], [[TMP73]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI4]], [[TMP73]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP74:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -1462,7 +1462,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = sext [[WIDE_MASKED_LOAD1]] to ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP18]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP18]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 @@ -2441,28 +2441,28 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-INTERLEAVE1-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <128 x i8> [[WIDE_VEC]], <128 x i8> poison, <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[STRIDED_VEC]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = mul nsw <16 x i32> [[TMP13]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = sext <16 x i8> [[STRIDED_VEC8]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP29]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP18]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP18]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[STRIDED_VEC9]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = mul nsw <16 x i32> [[TMP17]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP21]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP31]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP31]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = sext <16 x i8> [[STRIDED_VEC10]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP24]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP24]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = sext <16 x i8> [[STRIDED_VEC11]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = mul nsw <16 x i32> [[TMP32]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP27]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP27]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = sext <16 x i8> [[STRIDED_VEC12]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP30]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP34]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP30]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP34]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[STRIDED_VEC13]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP25]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP33]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP33]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = sext <16 x i8> [[STRIDED_VEC14]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = mul nsw <16 x i32> [[TMP35]], [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP36]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) +; CHECK-INTERLEAVE1-NEXT: [[TMP36]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] @@ -2541,28 +2541,28 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-INTERLEAVED-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <128 x i8> [[WIDE_VEC]], <128 x i8> poison, <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[STRIDED_VEC]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nsw <16 x i32> [[TMP13]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = sext <16 x i8> [[STRIDED_VEC8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE15]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE15]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[STRIDED_VEC9]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nsw <16 x i32> [[TMP17]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP18]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP18]]) ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = sext <16 x i8> [[STRIDED_VEC10]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = sext <16 x i8> [[STRIDED_VEC11]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = mul nsw <16 x i32> [[TMP21]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE18]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE18]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = sext <16 x i8> [[STRIDED_VEC12]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE19]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP24]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE19]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP24]]) ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[STRIDED_VEC13]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP25]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE20]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE20]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = sext <16 x i8> [[STRIDED_VEC14]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nsw <16 x i32> [[TMP27]], [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] @@ -2641,28 +2641,28 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-MAXBW-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <128 x i8> [[WIDE_VEC]], <128 x i8> poison, <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[STRIDED_VEC]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = mul nsw <16 x i32> [[TMP13]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP14]]) ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = sext <16 x i8> [[STRIDED_VEC8]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = mul nsw <16 x i32> [[TMP15]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE15]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE15]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI6]], <16 x i32> [[TMP16]]) ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[STRIDED_VEC9]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = mul nsw <16 x i32> [[TMP17]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP18]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP18]]) ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = sext <16 x i8> [[STRIDED_VEC10]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI4]], <16 x i32> [[TMP20]]) ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = sext <16 x i8> [[STRIDED_VEC11]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = mul nsw <16 x i32> [[TMP21]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE18]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE18]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP22]]) ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = sext <16 x i8> [[STRIDED_VEC12]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul nsw <16 x i32> [[TMP23]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE19]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP24]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE19]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP24]]) ; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[STRIDED_VEC13]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP25]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE20]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE20]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP26]]) ; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = sext <16 x i8> [[STRIDED_VEC14]] to <16 x i32> ; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = mul nsw <16 x i32> [[TMP27]], [[TMP10]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll index e24b47db14008..b308b925181b1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll @@ -27,8 +27,8 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC2-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; IC2-NEXT: [[TMP6:%.*]] = mul nuw nsw <16 x i32> [[TMP4]], [[TMP4]] ; IC2-NEXT: [[TMP7:%.*]] = mul nuw nsw <16 x i32> [[TMP5]], [[TMP5]] -; IC2-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP6]]) -; IC2-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP7]]) +; IC2-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP6]]) +; IC2-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP7]]) ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; IC2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IC2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -88,10 +88,10 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC4-NEXT: [[TMP11:%.*]] = mul nuw nsw <16 x i32> [[TMP7]], [[TMP7]] ; IC4-NEXT: [[TMP12:%.*]] = mul nuw nsw <16 x i32> [[TMP8]], [[TMP8]] ; IC4-NEXT: [[TMP13:%.*]] = mul nuw nsw <16 x i32> [[TMP9]], [[TMP9]] -; IC4-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) -; IC4-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) -; IC4-NEXT: [[PARTIAL_REDUCE8]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP12]]) -; IC4-NEXT: [[PARTIAL_REDUCE9]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP13]]) +; IC4-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP10]]) +; IC4-NEXT: [[PARTIAL_REDUCE7]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) +; IC4-NEXT: [[PARTIAL_REDUCE8]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP12]]) +; IC4-NEXT: [[PARTIAL_REDUCE9]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP13]]) ; IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 64cb33181cc1e..efffadc559f62 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -105,7 +105,7 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = zext [[WIDE_LOAD1]] to ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = mul [[TMP12]], [[TMP9]] ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = sub zeroinitializer, [[TMP13]] -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP14]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32( [[VEC_PHI]], [[TMP14]]) ; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 370bfc641001a..40cce22116db9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -73,7 +73,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP8]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP8]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -114,7 +114,7 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP2]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP2]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -139,8 +139,8 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP4]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP4]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -162,7 +162,7 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP2]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP2]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-MAXBW-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -255,7 +255,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv16i64( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv16i64( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -349,7 +349,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 2 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv2i64.nxv8i64( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -391,7 +391,7 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) +; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -416,8 +416,8 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) -; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP5]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP5]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -443,7 +443,7 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -546,7 +546,7 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP6]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = zext [[WIDE_MASKED_LOAD]] to ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], zeroinitializer -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 @@ -817,7 +817,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1061,7 +1061,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-MAXBW-NEXT: store zeroinitializer, ptr [[TMP10]], align 1 -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index dff6c793897da..c37404c36f155 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -65,7 +65,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 1 ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = sext [[WIDE_LOAD1]] to ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul [[TMP11]], [[TMP8]] -; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) +; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -134,8 +134,8 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = sext <8 x i8> [[WIDE_LOAD4]] to <8 x i32> ; FIXED-ZVQDOTQ-NEXT: [[TMP10:%.*]] = mul <8 x i32> [[TMP8]], [[TMP3]] ; FIXED-ZVQDOTQ-NEXT: [[TMP11:%.*]] = mul <8 x i32> [[TMP9]], [[TMP4]] -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -225,7 +225,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 1 ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = zext [[WIDE_LOAD1]] to ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul [[TMP11]], [[TMP8]] -; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) +; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -294,8 +294,8 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD4]] to <8 x i32> ; FIXED-ZVQDOTQ-NEXT: [[TMP10:%.*]] = mul <8 x i32> [[TMP8]], [[TMP3]] ; FIXED-ZVQDOTQ-NEXT: [[TMP11:%.*]] = mul <8 x i32> [[TMP9]], [[TMP4]] -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -385,7 +385,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 1 ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = sext [[WIDE_LOAD1]] to ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul [[TMP11]], [[TMP8]] -; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) +; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -454,8 +454,8 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = sext <8 x i8> [[WIDE_LOAD4]] to <8 x i32> ; FIXED-ZVQDOTQ-NEXT: [[TMP10:%.*]] = mul <8 x i32> [[TMP8]], [[TMP3]] ; FIXED-ZVQDOTQ-NEXT: [[TMP11:%.*]] = mul <8 x i32> [[TMP9]], [[TMP4]] -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -544,7 +544,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; ZVQDOTQ-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 1 ; ZVQDOTQ-NEXT: [[TMP11:%.*]] = zext [[WIDE_LOAD1]] to ; ZVQDOTQ-NEXT: [[TMP12:%.*]] = mul [[TMP11]], [[TMP8]] -; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.experimental.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) +; ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv1i32.nxv4i32( [[VEC_PHI]], [[TMP12]]) ; ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; ZVQDOTQ-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ZVQDOTQ-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -613,8 +613,8 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD4]] to <8 x i32> ; FIXED-ZVQDOTQ-NEXT: [[TMP10:%.*]] = mul <8 x i32> [[TMP8]], [[TMP3]] ; FIXED-ZVQDOTQ-NEXT: [[TMP11:%.*]] = mul <8 x i32> [[TMP9]], [[TMP4]] -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) -; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI]], <8 x i32> [[TMP10]]) +; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll index abd98a4dc64b8..ec3711eabb7e1 100644 --- a/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll +++ b/llvm/test/Transforms/VectorCombine/intrinsic-scalarize.ll @@ -81,12 +81,12 @@ define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) { ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 ; CHECK-NEXT: [[Y_INSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i32 0 -; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v8i32(<4 x i32> [[X_INSERT]], <8 x i32> [[Y_INSERT]]) +; CHECK-NEXT: [[V:%.*]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v8i32(<4 x i32> [[X_INSERT]], <8 x i32> [[Y_INSERT]]) ; CHECK-NEXT: ret <4 x i32> [[V]] ; %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0 %y.insert = insertelement <8 x i32> poison, i32 %y, i32 0 - %v = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert) + %v = call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert) ret <4 x i32> %v } From a694998442d69a6a81c5d4906f19f0051e1cf31e Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 15 Sep 2025 14:40:15 +0100 Subject: [PATCH 2/4] Fix formatting --- llvm/lib/IR/AutoUpgrade.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 0eb3bf961b67a..5385b1f8cac0b 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1259,7 +1259,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, .StartsWith("reverse.", Intrinsic::vector_reverse) .StartsWith("interleave2.", Intrinsic::vector_interleave2) .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2) - .StartsWith("partial.reduce.add", Intrinsic::vector_partial_reduce_add) + .StartsWith("partial.reduce.add", + Intrinsic::vector_partial_reduce_add) .Default(Intrinsic::not_intrinsic); if (ID != Intrinsic::not_intrinsic) { const auto *FT = F->getFunctionType(); @@ -1272,7 +1273,7 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, Tys.push_back(FT->getParamType(0)); if (ID == Intrinsic::vector_insert || ID == Intrinsic::vector_partial_reduce_add) - // Inserting overloads the inserted type. + // Inserting overloads the inserted type. Tys.push_back(FT->getParamType(1)); rename(F); NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys); From e3ece1a47900e23718930a06ac65b95d5576e7c0 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 16 Sep 2025 11:47:27 +0100 Subject: [PATCH 3/4] Address comments on test --- .../Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll b/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll index 1277d5d933d7b..623faad4406a1 100644 --- a/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll +++ b/llvm/test/Bitcode/upgrade-vector-partial-reduce-add-intrinsic.ll @@ -1,11 +1,10 @@ -; RUN: opt -S < %s | FileCheck %s ; RUN: llvm-as %s -o - | llvm-dis | FileCheck %s define <4 x i32> @partial_reduce_add_fixed(<16 x i32> %a) { ; CHECK-LABEL: @partial_reduce_add_fixed ; CHECK: %res = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> zeroinitializer, <16 x i32> %a) - %res = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> zeroinitializer, <16 x i32> %a) + %res = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> zeroinitializer, <16 x i32> %a) ret <4 x i32> %res } @@ -14,7 +13,7 @@ define @partial_reduce_add_scalable( %a) { ; CHECK-LABEL: @partial_reduce_add_scalable ; CHECK: %res = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( zeroinitializer, %a) - %res = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( zeroinitializer, %a) + %res = call @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32( zeroinitializer, %a) ret %res } From b236fbad78ee4e5d6396587fdf410969c6eb83ff Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 17 Sep 2025 09:57:31 +0000 Subject: [PATCH 4/4] Update sve2p1-dots-partial-reduction.ll --- .../CodeGen/AArch64/sve2p1-dots-partial-reduction.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll b/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll index d9ba613931982..51673282bd8ff 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll @@ -11,7 +11,7 @@ entry: %a.wide = zext %a to %b.wide = zext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add( %acc, %mult) ret %partial.reduce } @@ -24,7 +24,7 @@ entry: %a.wide = sext %a to %b.wide = sext %b to %mult = mul nuw nsw %a.wide, %b.wide - %partial.reduce = tail call @llvm.experimental.vector.partial.reduce.add( %acc, %mult) + %partial.reduce = tail call @llvm.vector.partial.reduce.add( %acc, %mult) ret %partial.reduce } @@ -44,7 +44,7 @@ entry: %a.wide = zext <16 x i16> %a to <16 x i32> %b.wide = zext <16 x i16> %b to <16 x i32> %mult = mul nuw nsw <16 x i32> %a.wide, %b.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) store <8 x i32> %partial.reduce, ptr %accptr ret void } @@ -65,7 +65,7 @@ entry: %a.wide = sext <16 x i16> %a to <16 x i32> %b.wide = sext <16 x i16> %b to <16 x i32> %mult = mul nuw nsw <16 x i32> %a.wide, %b.wide - %partial.reduce = tail call <8 x i32> @llvm.experimental.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) + %partial.reduce = tail call <8 x i32> @llvm.vector.partial.reduce.add(<8 x i32> %acc, <16 x i32> %mult) store <8 x i32> %partial.reduce, ptr %accptr ret void } @@ -83,7 +83,7 @@ entry: %a.wide = zext <8 x i16> %a to <8 x i32> %b.wide = zext <8 x i16> %b to <8 x i32> %mult = mul nuw nsw <8 x i32> %a.wide, %b.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) ret <4 x i32> %partial.reduce } @@ -100,6 +100,6 @@ entry: %a.wide = sext <8 x i16> %a to <8 x i32> %b.wide = sext <8 x i16> %b to <8 x i32> %mult = mul nuw nsw <8 x i32> %a.wide, %b.wide - %partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) + %partial.reduce = tail call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> %acc, <8 x i32> %mult) ret <4 x i32> %partial.reduce }