From f1e79c2042694eb0ad8eb577cf5e46650b6e58c7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 11 Aug 2025 21:55:16 +0100 Subject: [PATCH 1/6] [VPlan] Use VPIRMetadata for VPInterleaveRecipe. Use VPIRMetadata for VPInterleaveRecipe to preserve noalias metadata added by versioning. This still uses InterleaveGroup's logic to preserve existing metadata from IR. This can be migrated separately. Fixes https://github.com/llvm/llvm-project/issues/153006. --- llvm/lib/Transforms/Vectorize/VPlan.h | 15 ++++---- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 15 ++++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 27 +++++++++----- .../LoopVectorize/X86/interleave-cost.ll | 36 +++++-------------- .../interleaved-accesses-metadata.ll | 32 +++++++++-------- .../Transforms/Vectorize/VPlanTest.cpp | 2 +- 6 files changed, 70 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 367a6ebf48dc2..8c10e06b5f423 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -943,6 +943,10 @@ class VPIRMetadata { void addMetadata(unsigned Kind, MDNode *Node) { Metadata.emplace_back(Kind, Node); } + + /// Intersect the this VPIRMetada objet with \p MD, keeping only metadata + /// nodes in both. + void intersect(const VPIRMetadata &MD); }; /// This is a concrete Recipe that models a single VPlan-level instruction. @@ -2426,7 +2430,8 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe { /// or stores into one wide load/store and shuffles. The first operand of a /// VPInterleave recipe is the address, followed by the stored values, followed /// by an optional mask. -class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { +class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase, + public VPIRMetadata { const InterleaveGroup *IG; /// Indicates if the interleave group is in a conditional block and requires a @@ -2440,10 +2445,8 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, ArrayRef StoredValues, VPValue *Mask, - bool NeedsMaskForGaps, DebugLoc DL) - : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, - DL), - + bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL) + : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}, DL), VPIRMetadata(MD), IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) { // TODO: extend the masked interleaved-group support to reversed access. assert((!Mask || !IG->isReverse()) && @@ -2466,7 +2469,7 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase { VPInterleaveRecipe *clone() override { return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(), - NeedsMaskForGaps, getDebugLoc()); + NeedsMaskForGaps, *this, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPInterleaveSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 89214b410fab4..484a4a2962f23 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1560,6 +1560,17 @@ void VPIRMetadata::applyMetadata(Instruction &I) const { I.setMetadata(Kind, Node); } +void VPIRMetadata::intersect(const VPIRMetadata &Other) { + SmallVector> MetadataUnion; + for (const auto &[KindA, MDA] : Metadata) { + for (const auto &[KindB, MDB] : Other.Metadata) { + if (KindA == KindB && MDA == MDB) + MetadataUnion.emplace_back(KindA, MDA); + } + } + Metadata = std::move(MetadataUnion); +} + void VPWidenCallRecipe::execute(VPTransformState &State) { assert(State.VF.isVector() && "not widening"); assert(Variant != nullptr && "Can't create vector function."); @@ -3575,6 +3586,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { } else NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr, Group->getAlign(), "wide.vec"); + applyMetadata(*NewLoad); + // TODO: Also manage existing metadata using VPIRMetadata. Group->addMetadata(NewLoad); ArrayRef VPDefs = definedValues(); @@ -3677,6 +3690,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { NewStoreInstr = State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign()); + applyMetadata(*NewStoreInstr); + // TODO: Also manage existing metadata using VPIRMetadata. Group->addMetadata(NewStoreInstr); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b25fc0af1fb51..b9afd9de0a4bf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2612,12 +2612,24 @@ void VPlanTransforms::createInterleaveGroups( VPDominatorTree VPDT; VPDT.recalculate(Plan); for (const auto *IG : InterleaveGroups) { + // Get or create the start address for the interleave group. + auto *Start = + cast(RecipeBuilder.getRecipe(IG->getMember(0))); + + VPIRMetadata InterleaveMD(*Start); SmallVector StoredValues; - for (unsigned i = 0; i < IG->getFactor(); ++i) - if (auto *SI = dyn_cast_or_null(IG->getMember(i))) { - auto *StoreR = cast(RecipeBuilder.getRecipe(SI)); + for (unsigned i = 0; i < IG->getFactor(); ++i) { + Instruction *MemI = IG->getMember(i); + if (!MemI) + continue; + VPWidenMemoryRecipe *MemR = + cast(RecipeBuilder.getRecipe(MemI)); + if (!MemR) + continue; + if (auto *StoreR = dyn_cast(MemR)) StoredValues.push_back(StoreR->getStoredValue()); - } + InterleaveMD.intersect(*MemR); + } bool NeedsMaskForGaps = (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) || @@ -2632,9 +2644,6 @@ void VPlanTransforms::createInterleaveGroups( getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts())) NW = Gep->getNoWrapFlags().withoutNoUnsignedWrap(); - // Get or create the start address for the interleave group. - auto *Start = - cast(RecipeBuilder.getRecipe(IG->getMember(0))); VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { @@ -2667,8 +2676,10 @@ void VPlanTransforms::createInterleaveGroups( ReversePtr->insertBefore(InsertPos); Addr = ReversePtr; } + auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, - InsertPos->getMask(), NeedsMaskForGaps, InsertPos->getDebugLoc()); + InsertPos->getMask(), NeedsMaskForGaps, + InterleaveMD, InsertPos->getDebugLoc()); VPIG->insertBefore(InsertPos); unsigned J = 0; diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 780dbe695d4df..80451dc6f996b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -145,7 +145,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP11]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP26]], align 4, !alias.scope [[META4:![0-9]+]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> @@ -166,10 +166,10 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4 +; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP28]], align 4, !alias.scope [[META7:![0-9]+]], !noalias [[META4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -215,7 +215,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -396,12 +396,12 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP51:%.*]] = lshr exact i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr nusw i32, ptr [[B]], i64 [[TMP51]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP52]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META16:![0-9]+]], !noalias [[META14]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -409,11 +409,11 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <8 x i32> [[TMP60]], <8 x i32> zeroinitializer, <16 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <32 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4 +; CHECK-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4, !alias.scope [[META14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -454,7 +454,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 ; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -511,8 +511,6 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 -; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -525,7 +523,6 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -623,8 +620,6 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 -; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -637,7 +632,6 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -668,15 +662,3 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK: [[META6]] = !{[[META7:![0-9]+]]} -; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} -; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} -; CHECK: [[META9]] = !{[[META10:![0-9]+]]} -; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} -; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll index 4fe7c97ccd668..0eb251704f453 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll @@ -129,17 +129,17 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META14:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META17:![0-9]+]], !noalias [[META14]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[STRIDED_VEC3]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META17]], !noalias [[META14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -150,15 +150,15 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L_INVAR:%.*]] = load float, ptr [[SRC]], align 4 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8 -; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[PTR_IV]], align 4 -; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[L_0]], [[L_INVAR]] -; CHECK-NEXT: store float [[MUL_0]], ptr [[PTR_IV]], align 4, !tbaa [[TBAA10]] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 4 -; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4, !tbaa [[TBAA12]] +; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[L_1]], [[L_INVAR]] -; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_1]], align 4 +; CHECK-NEXT: store float [[MUL_1]], ptr [[PTR_IV]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_1]], align 4, !tbaa [[TBAA12]] +; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[L_2]], [[L_INVAR]] +; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -216,7 +216,7 @@ define void @noalias_metadata_from_versioning(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -234,7 +234,7 @@ define void @noalias_metadata_from_versioning(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[L_1]], 1.000000e+01 ; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -285,8 +285,10 @@ exit: ; CHECK: [[META14]] = !{[[META15:![0-9]+]]} ; CHECK: [[META15]] = distinct !{[[META15]], [[META16:![0-9]+]]} ; CHECK: [[META16]] = distinct !{[[META16]], !"LVerDomain"} -; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META8]], [[META9]]} -; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META8]]} +; CHECK: [[META17]] = !{[[META18:![0-9]+]]} +; CHECK: [[META18]] = distinct !{[[META18]], [[META16]]} ; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META8]], [[META9]]} -; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META9]], [[META8]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META8]], [[META9]]} +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META9]], [[META8]]} ;. diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 94b74d2117e18..db64c755d005f 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1088,7 +1088,7 @@ TEST_F(VPRecipeTest, CastVPInterleaveRecipeToVPUser) { VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2)); InterleaveGroup IG(4, false, Align(4)); - VPInterleaveRecipe Recipe(&IG, Addr, {}, Mask, false, DebugLoc()); + VPInterleaveRecipe Recipe(&IG, Addr, {}, Mask, false, {}, DebugLoc()); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); From 030511ca1f16b4bde2bf0eb94c7622249b8b7001 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 12 Aug 2025 11:58:12 +0100 Subject: [PATCH 2/6] !fixup address comments, thanks --- llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++++- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 15 +++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8c10e06b5f423..561b783a2db0f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -936,6 +936,11 @@ class VPIRMetadata { /// Copy constructor for cloning. VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {} + VPIRMetadata &operator=(const VPIRMetadata &Other) { + Metadata = Other.Metadata; + return *this; + } + /// Add all metadata to \p I. void applyMetadata(Instruction &I) const; @@ -944,7 +949,7 @@ class VPIRMetadata { Metadata.emplace_back(Kind, Node); } - /// Intersect the this VPIRMetada objet with \p MD, keeping only metadata + /// Intersect this VPIRMetada object with \p MD, keeping only metadata /// nodes in both. void intersect(const VPIRMetadata &MD); }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b9afd9de0a4bf..5f00d2291f85d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2612,12 +2612,8 @@ void VPlanTransforms::createInterleaveGroups( VPDominatorTree VPDT; VPDT.recalculate(Plan); for (const auto *IG : InterleaveGroups) { - // Get or create the start address for the interleave group. - auto *Start = - cast(RecipeBuilder.getRecipe(IG->getMember(0))); - - VPIRMetadata InterleaveMD(*Start); SmallVector StoredValues; + VPIRMetadata InterleaveMD; for (unsigned i = 0; i < IG->getFactor(); ++i) { Instruction *MemI = IG->getMember(i); if (!MemI) @@ -2628,7 +2624,11 @@ void VPlanTransforms::createInterleaveGroups( continue; if (auto *StoreR = dyn_cast(MemR)) StoredValues.push_back(StoreR->getStoredValue()); - InterleaveMD.intersect(*MemR); + + if (i == 0) + InterleaveMD = VPIRMetadata(*MemR); + else + InterleaveMD.intersect(*MemR); } bool NeedsMaskForGaps = @@ -2644,6 +2644,9 @@ void VPlanTransforms::createInterleaveGroups( getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts())) NW = Gep->getNoWrapFlags().withoutNoUnsignedWrap(); + // Get or create the start address for the interleave group. + auto *Start = + cast(RecipeBuilder.getRecipe(IG->getMember(0))); VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { From 493fa1d63833b319e1a502d97037d1423d85029e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 14 Aug 2025 19:20:27 +0100 Subject: [PATCH 3/6] !fixup peel --- .../Transforms/Vectorize/VPlanTransforms.cpp | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5f00d2291f85d..caa6c8dc85402 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2612,23 +2612,23 @@ void VPlanTransforms::createInterleaveGroups( VPDominatorTree VPDT; VPDT.recalculate(Plan); for (const auto *IG : InterleaveGroups) { + auto *Start = + cast(RecipeBuilder.getRecipe(IG->getMember(0))); + + VPIRMetadata InterleaveMD(*Start); SmallVector StoredValues; - VPIRMetadata InterleaveMD; - for (unsigned i = 0; i < IG->getFactor(); ++i) { - Instruction *MemI = IG->getMember(i); + if (auto *StoreR = dyn_cast(Start)) + StoredValues.push_back(StoreR->getStoredValue()); + for (unsigned I = 1; I < IG->getFactor(); ++I) { + Instruction *MemI = IG->getMember(I); if (!MemI) continue; VPWidenMemoryRecipe *MemR = cast(RecipeBuilder.getRecipe(MemI)); - if (!MemR) - continue; - if (auto *StoreR = dyn_cast(MemR)) - StoredValues.push_back(StoreR->getStoredValue()); - - if (i == 0) - InterleaveMD = VPIRMetadata(*MemR); - else - InterleaveMD.intersect(*MemR); + if (!StoredValues.empty()) + StoredValues.push_back( + cast(MemR)->getStoredValue()); + InterleaveMD.intersect(*MemR); } bool NeedsMaskForGaps = @@ -2645,8 +2645,6 @@ void VPlanTransforms::createInterleaveGroups( NW = Gep->getNoWrapFlags().withoutNoUnsignedWrap(); // Get or create the start address for the interleave group. - auto *Start = - cast(RecipeBuilder.getRecipe(IG->getMember(0))); VPValue *Addr = Start->getAddr(); VPRecipeBase *AddrDef = Addr->getDefiningRecipe(); if (AddrDef && !VPDT.properlyDominates(AddrDef, InsertPos)) { From 24e818e0e187fb8f2b814c4c61cc3c4b11be1081 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 16 Aug 2025 21:34:12 +0100 Subject: [PATCH 4/6] !fixup address comments, thanks --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 ++++++---- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 16 +++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 561b783a2db0f..ce985322d9ca6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -950,7 +950,7 @@ class VPIRMetadata { } /// Intersect this VPIRMetada object with \p MD, keeping only metadata - /// nodes in both. + /// nodes that are common to both. void intersect(const VPIRMetadata &MD); }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 484a4a2962f23..2644935494ab1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1561,14 +1561,16 @@ void VPIRMetadata::applyMetadata(Instruction &I) const { } void VPIRMetadata::intersect(const VPIRMetadata &Other) { - SmallVector> MetadataUnion; + SmallVector> MetadataIntersection; for (const auto &[KindA, MDA] : Metadata) { for (const auto &[KindB, MDB] : Other.Metadata) { - if (KindA == KindB && MDA == MDB) - MetadataUnion.emplace_back(KindA, MDA); + if (KindA == KindB && MDA == MDB) { + MetadataIntersection.emplace_back(KindA, MDA); + break; + } } } - Metadata = std::move(MetadataUnion); + Metadata = std::move(MetadataIntersection); } void VPWidenCallRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index caa6c8dc85402..41b163d08f759 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2614,21 +2614,19 @@ void VPlanTransforms::createInterleaveGroups( for (const auto *IG : InterleaveGroups) { auto *Start = cast(RecipeBuilder.getRecipe(IG->getMember(0))); - VPIRMetadata InterleaveMD(*Start); SmallVector StoredValues; if (auto *StoreR = dyn_cast(Start)) StoredValues.push_back(StoreR->getStoredValue()); for (unsigned I = 1; I < IG->getFactor(); ++I) { - Instruction *MemI = IG->getMember(I); - if (!MemI) + Instruction *MemberI = IG->getMember(I); + if (!MemberI) continue; - VPWidenMemoryRecipe *MemR = - cast(RecipeBuilder.getRecipe(MemI)); - if (!StoredValues.empty()) - StoredValues.push_back( - cast(MemR)->getStoredValue()); - InterleaveMD.intersect(*MemR); + VPWidenMemoryRecipe *MemoryR = + cast(RecipeBuilder.getRecipe(MemberI)); + if (auto *StoreR = dyn_cast(MemoryR)) + StoredValues.push_back(StoreR->getStoredValue()); + InterleaveMD.intersect(*MemoryR); } bool NeedsMaskForGaps = From 7d047b674a580dfa802f195b74ce5cbc6a6e2472 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 20 Aug 2025 22:33:20 +0100 Subject: [PATCH 5/6] !fixup remove stray line --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 41b163d08f759..27bab726ce2e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2675,7 +2675,6 @@ void VPlanTransforms::createInterleaveGroups( ReversePtr->insertBefore(InsertPos); Addr = ReversePtr; } - auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps, InterleaveMD, InsertPos->getDebugLoc()); From 7e6564fed838569ee78e2f75b3229114b1b98162 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 21 Aug 2025 12:28:16 +0100 Subject: [PATCH 6/6] !fixup update tests after updating to latest main --- .../LoopVectorize/X86/interleave-cost.ll | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 80451dc6f996b..6210a9aa66d50 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -511,6 +511,8 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -523,6 +525,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -620,6 +623,8 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: store <4 x double> , ptr [[DST]], align 8 +; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: @@ -632,6 +637,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -662,3 +668,22 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[META4]] = !{[[META5:![0-9]+]]} +; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]], !"LVerDomain"} +; CHECK: [[META7]] = !{[[META8:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} +; CHECK: [[META11]] = !{[[META12:![0-9]+]]} +; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]} +; CHECK: [[META13]] = distinct !{[[META13]], !"LVerDomain"} +; CHECK: [[META14]] = !{[[META15:![0-9]+]]} +; CHECK: [[META15]] = distinct !{[[META15]], [[META13]]} +; CHECK: [[META16]] = !{[[META17:![0-9]+]]} +; CHECK: [[META17]] = distinct !{[[META17]], [[META13]]} +; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} +; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]} +;.