From 050be9f01e80b57252d5fc798e41d6c74d6c2fc9 Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 21 Apr 2024 15:03:47 +0100 Subject: [PATCH 1/6] [VectorCombine] Add foldShuffleToIdentity This patch adds a basic version of a combine that attempts to fold away shuffles that when combines simplify away to an identity shuffle. For example: %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> %abt = fneg <4 x half> %at %abb = fneg <4 x half> %ab %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> By looking through the shuffles, it can be simplified to: %r = fneg <8 x half> %a The code tracks each lane starting from the original shuffle, keeping a track of a vector of {src, idx}. As we propagate up through the instructions we will either look through intermediate instructions (binops and unops) or see a collections of lanes that all have the same src and incrementing idx (an identity). We can also see a single value with identical lanes, which we can treat like a splat. Only the basic version is added here, handling identites, splats, binops and unops. In follow-up patches other instructions can be added such as constants, intrinsics, cmp/sel and zext/sext/trunc. --- .../Transforms/Vectorize/VectorCombine.cpp | 144 +++++++++++++++++ .../AArch64/shuffletoidentity.ll | 145 ++++-------------- .../Transforms/VectorCombine/X86/pr67803.ll | 4 +- .../VectorCombine/X86/select-shuffle.ll | 3 +- 4 files changed, 176 insertions(+), 120 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index da3c780550a08..ef7b629cda5f5 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -114,6 +114,7 @@ class VectorCombine { bool foldShuffleOfBinops(Instruction &I); bool foldShuffleOfCastops(Instruction &I); bool foldShuffleOfShuffles(Instruction &I); + bool foldShuffleToIdentity(Instruction &I); bool foldShuffleFromReductions(Instruction &I); bool foldTruncFromReductions(Instruction &I); bool foldSelectShuffle(Instruction &I, bool FromReduction = false); @@ -1667,6 +1668,148 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { return true; } +// Starting from a shuffle, look up through operands tracking the shuffled index +// of each lane. If we can simplify away the shuffles to identities then +// do so. +bool VectorCombine::foldShuffleToIdentity(Instruction &I) { + FixedVectorType *Ty = dyn_cast(I.getType()); + if (!Ty || !isa(I.getOperand(0)) || + !isa(I.getOperand(1))) + return false; + + using InstLane = std::pair; + + auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane { + while (auto *SV = dyn_cast(V)) { + unsigned NumElts = + cast(SV->getOperand(0)->getType())->getNumElements(); + int M = SV->getMaskValue(Lane); + if (M < 0) + return {nullptr, -1}; + else if (M < (int)NumElts) { + V = SV->getOperand(0); + Lane = M; + } else { + V = SV->getOperand(1); + Lane = M - NumElts; + } + } + return InstLane{V, Lane}; + }; + + auto GenerateInstLaneVectorFromOperand = + [&LookThroughShuffles](const SmallVector &Item, int Op) { + SmallVector NItem; + for (InstLane V : Item) { + NItem.emplace_back( + !V.first + ? InstLane{nullptr, -1} + : LookThroughShuffles( + cast(V.first)->getOperand(Op), V.second)); + } + return NItem; + }; + + SmallVector Start; + for (unsigned M = 0; M < Ty->getNumElements(); ++M) + Start.push_back(LookThroughShuffles(&I, M)); + + SmallVector> Worklist; + Worklist.push_back(Start); + SmallPtrSet IdentityLeafs, SplatLeafs; + unsigned NumVisited = 0; + + while (!Worklist.empty()) { + SmallVector Item = Worklist.pop_back_val(); + if (++NumVisited > MaxInstrsToScan) + return false; + + // If we found an undef first lane then bail out to keep things simple. + if (!Item[0].first) + return false; + + // Look for an identity value. + if (Item[0].second == 0 && Item[0].first->getType() == Ty && + all_of(drop_begin(enumerate(Item)), [&](const auto &E) { + return !E.value().first || (E.value().first == Item[0].first && + E.value().second == (int)E.index()); + })) { + IdentityLeafs.insert(Item[0].first); + continue; + } + // Look for a splat value. + if (all_of(drop_begin(Item), [&](InstLane &IL) { + return !IL.first || + (IL.first == Item[0].first && IL.second == Item[0].second); + })) { + SplatLeafs.insert(Item[0].first); + continue; + } + + // We need each element to be the same type of value, and check that each + // element has a single use. + if (!all_of(drop_begin(Item), [&](InstLane IL) { + if (!IL.first) + return true; + if (isa(IL.first) && + !cast(IL.first)->hasOneUse()) + return false; + return IL.first->getValueID() == Item[0].first->getValueID() && + (!isa(IL.first) || + cast(IL.first)->getIntrinsicID() == + cast(Item[0].first)->getIntrinsicID()); + })) + return false; + + // Check the operator is one that we support. + if (isa(Item[0].first)) { + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0)); + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1)); + } else if (isa(Item[0].first)) { + Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0)); + } else { + return false; + } + } + + // If we got this far, we know the shuffles are superfluous and can be + // removed. Scan through again and generate the new tree of instructions. + std::function &)> generate = + [&](const SmallVector &Item) -> Value * { + if (IdentityLeafs.contains(Item[0].first) && + all_of(drop_begin(enumerate(Item)), [&](const auto &E) { + return !E.value().first || (E.value().first == Item[0].first && + E.value().second == (int)E.index()); + })) { + return Item[0].first; + } else if (SplatLeafs.contains(Item[0].first)) { + if (auto ILI = dyn_cast(Item[0].first)) + Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef()); + else if (isa(Item[0].first)) + Builder.SetInsertPointPastAllocas(I.getParent()->getParent()); + SmallVector Mask(Ty->getNumElements(), Item[0].second); + return Builder.CreateShuffleVector(Item[0].first, Mask); + } + + auto *I = cast(Item[0].first); + SmallVector Ops; + unsigned E = I->getNumOperands(); + for (unsigned Idx = 0; Idx < E; Idx++) + Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx))); + Builder.SetInsertPoint(I); + if (auto BI = dyn_cast(I)) + return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), + Ops[0], Ops[1]); + if (auto UI = dyn_cast(I)) + return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]); + llvm_unreachable("Unhandled instruction in generate"); + }; + + Value *V = generate(Start); + replaceValue(I, *V); + return true; +} + /// Given a commutative reduction, the order of the input lanes does not alter /// the results. We can use this to remove certain shuffles feeding the /// reduction, removing the need to shuffle at all. @@ -2224,6 +2367,7 @@ bool VectorCombine::run() { MadeChange |= foldShuffleOfCastops(I); MadeChange |= foldShuffleOfShuffles(I); MadeChange |= foldSelectShuffle(I); + MadeChange |= foldShuffleToIdentity(I); break; case Instruction::BitCast: MadeChange |= foldBitcastShuffle(I); diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 6a81964b917ed..43ece88256acb 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -15,13 +15,7 @@ define <8 x i8> @trivial(<8 x i8> %a) { define <8 x i8> @add(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @add( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -80,13 +74,7 @@ define <8 x i8> @wrong_lanes(<8 x i8> %a, <8 x i8> %b) { define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -101,11 +89,7 @@ define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) { define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @fneg( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fneg <4 x half> [[AT]] -; CHECK-NEXT: [[ABB:%.*]] = fneg <4 x half> [[AB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fneg <8 x half> [[A:%.*]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -135,12 +119,8 @@ define <8 x i8> @abs(<8 x i8> %a) { define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splat0( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -154,12 +134,8 @@ define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) { define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splat2( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -173,12 +149,8 @@ define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) { define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splatandidentity( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] -; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[A]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> @@ -192,11 +164,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splattwice( -; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[AB1:%.*]] = fadd <4 x half> [[AS]], [[BS]] -; CHECK-NEXT: [[AB2:%.*]] = fadd <4 x half> [[AS]], [[BS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[AB1]], <4 x half> [[AB2]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer @@ -209,13 +179,7 @@ define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @undeflane( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -281,18 +245,9 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) { define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: @inner_shuffle( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[CS:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ABT:%.*]] = mul <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = mul <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[ABT2:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABB2:%.*]] = shufflevector <4 x i8> [[ABB]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[ABT3:%.*]] = add <4 x i8> [[ABT2]], [[CS]] -; CHECK-NEXT: [[ABB3:%.*]] = add <4 x i8> [[ABB2]], [[CS]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -335,14 +290,9 @@ define <8 x i8> @extrause_add(<8 x i8> %a, <8 x i8> %b) { define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: @extrause_shuffle( -; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i8> [[BT]]) -; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] -; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[A:%.*]], [[B]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -457,7 +407,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) { ; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]]) -; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]]) ; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]]) ; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]]) @@ -472,7 +422,7 @@ define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) { %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> %abt = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %at, <4 x i8> %bt) %abb = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %ab, <4 x i8> %bb) - %abt1 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %abt, <4 x i8> %bt) + %abt1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abt, <4 x i8> %bt) %abb1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abb, <4 x i8> %bb) %abt2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abt1, <4 x i8> %bt) %abb2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abb1, <4 x i8> %bb) @@ -540,53 +490,16 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) { ; CHECK-LABEL: @v8f64interleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC28:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC29:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC33:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] -; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[STRIDED_VEC35:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC38:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC39:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7 -; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> -; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] +; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56 +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll index 0277580d21fcb..8dd3bc684a4c1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll +++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll @@ -5,8 +5,8 @@ define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @PR67803( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i32> ; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float> ; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]]) diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll index 685d661ea6bcd..a14995403cfac 100644 --- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll @@ -15,8 +15,7 @@ define <4 x double> @PR60649() { ; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] ; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ] ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[T0]], ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], ; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T5]] From 3843999b9fdd0bdfd980408dfd42c0fe9d7007ac Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 17 Apr 2024 13:45:02 +0100 Subject: [PATCH 2/6] Address comments and fix failing test --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ef7b629cda5f5..89bc385a54b20 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1672,7 +1672,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { // of each lane. If we can simplify away the shuffles to identities then // do so. bool VectorCombine::foldShuffleToIdentity(Instruction &I) { - FixedVectorType *Ty = dyn_cast(I.getType()); + auto *Ty = dyn_cast(I.getType()); if (!Ty || !isa(I.getOperand(0)) || !isa(I.getOperand(1))) return false; @@ -1685,7 +1685,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { cast(SV->getOperand(0)->getType())->getNumElements(); int M = SV->getMaskValue(Lane); if (M < 0) - return {nullptr, -1}; + return {nullptr, PoisonMaskElem}; else if (M < (int)NumElts) { V = SV->getOperand(0); Lane = M; @@ -1698,12 +1698,12 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { }; auto GenerateInstLaneVectorFromOperand = - [&LookThroughShuffles](const SmallVector &Item, int Op) { + [&LookThroughShuffles](ArrayRef Item, int Op) { SmallVector NItem; for (InstLane V : Item) { NItem.emplace_back( !V.first - ? InstLane{nullptr, -1} + ? InstLane{nullptr, PoisonMaskElem} : LookThroughShuffles( cast(V.first)->getOperand(Op), V.second)); } @@ -1751,8 +1751,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { if (!all_of(drop_begin(Item), [&](InstLane IL) { if (!IL.first) return true; - if (isa(IL.first) && - !cast(IL.first)->hasOneUse()) + if (auto *I = dyn_cast(IL.first); I && !I->hasOneUse()) return false; return IL.first->getValueID() == Item[0].first->getValueID() && (!isa(IL.first) || @@ -1774,8 +1773,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // If we got this far, we know the shuffles are superfluous and can be // removed. Scan through again and generate the new tree of instructions. - std::function &)> generate = - [&](const SmallVector &Item) -> Value * { + std::function)> generate = + [&](ArrayRef Item) -> Value * { if (IdentityLeafs.contains(Item[0].first) && all_of(drop_begin(enumerate(Item)), [&](const auto &E) { return !E.value().first || (E.value().first == Item[0].first && From 3aff7df48e319909719633d6ea0a433fd1ad548e Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 22 Apr 2024 08:42:02 +0100 Subject: [PATCH 3/6] Rebase and update phase-ordering tests --- .../AArch64/interleavevectorization.ll | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll index f1d7c0e0c4123..c085e10c049a9 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll @@ -22,19 +22,13 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP3:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP6:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> [[TMP6]], <16 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] +; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP9]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -412,22 +406,13 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32 ; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP5:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP6:%.*]] = add <32 x i16> [[TMP5]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP7:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP8:%.*]] = add <32 x i16> [[TMP7]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP9:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP10:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP11:%.*]] = add <32 x i16> [[TMP10]], [[WIDE_VEC36]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> [[TMP6]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i16> [[TMP12]], <16 x i16> [[TMP13]], <32 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]] +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP14]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; From 33065430bf037d81a699af81f1b863f0865fc05b Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 22 Apr 2024 14:35:28 +0100 Subject: [PATCH 4/6] Address more comments --- .../Transforms/Vectorize/VectorCombine.cpp | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 89bc385a54b20..1a0f90a080769 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1710,9 +1710,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { return NItem; }; - SmallVector Start; - for (unsigned M = 0; M < Ty->getNumElements(); ++M) - Start.push_back(LookThroughShuffles(&I, M)); + SmallVector Start(Ty->getNumElements()); + for (unsigned M = 0, E = Ty->getNumElements(); M < E; ++M) + Start[M] = LookThroughShuffles(&I, M); SmallVector> Worklist; Worklist.push_back(Start); @@ -1753,10 +1753,12 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { return true; if (auto *I = dyn_cast(IL.first); I && !I->hasOneUse()) return false; - return IL.first->getValueID() == Item[0].first->getValueID() && - (!isa(IL.first) || - cast(IL.first)->getIntrinsicID() == - cast(Item[0].first)->getIntrinsicID()); + if (IL.first->getValueID() != Item[0].first->getValueID()) + return false; + auto *II = dyn_cast(IL.first); + return !II || + II->getIntrinsicID() == + cast(Item[0].first)->getIntrinsicID(); })) return false; @@ -1773,7 +1775,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // If we got this far, we know the shuffles are superfluous and can be // removed. Scan through again and generate the new tree of instructions. - std::function)> generate = + std::function)> Generate = [&](ArrayRef Item) -> Value * { if (IdentityLeafs.contains(Item[0].first) && all_of(drop_begin(enumerate(Item)), [&](const auto &E) { @@ -1781,7 +1783,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { E.value().second == (int)E.index()); })) { return Item[0].first; - } else if (SplatLeafs.contains(Item[0].first)) { + } + if (SplatLeafs.contains(Item[0].first)) { if (auto ILI = dyn_cast(Item[0].first)) Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef()); else if (isa(Item[0].first)) @@ -1791,20 +1794,19 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { } auto *I = cast(Item[0].first); - SmallVector Ops; - unsigned E = I->getNumOperands(); - for (unsigned Idx = 0; Idx < E; Idx++) - Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx))); + SmallVector Ops(I->getNumOperands()); + for (unsigned Idx = 0, E = I->getNumOperands(); Idx < E; Idx++) + Ops[Idx] = Generate(GenerateInstLaneVectorFromOperand(Item, Idx)); Builder.SetInsertPoint(I); if (auto BI = dyn_cast(I)) return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(), Ops[0], Ops[1]); - if (auto UI = dyn_cast(I)) - return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]); - llvm_unreachable("Unhandled instruction in generate"); + assert(isa(I) && + "Unexpected instruction type in Generate"); + return Builder.CreateUnOp((Instruction::UnaryOps)I->getOpcode(), Ops[0]); }; - Value *V = generate(Start); + Value *V = Generate(Start); replaceValue(I, *V); return true; } From ee1e4df20171e42d5b11974481af72aa90ad7446 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 1 May 2024 13:37:23 +0100 Subject: [PATCH 5/6] Rebase over other changes and fixup tests. --- llvm/test/Transforms/VectorCombine/X86/pr67803.ll | 4 ++-- llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll index 8dd3bc684a4c1..0277580d21fcb 100644 --- a/llvm/test/Transforms/VectorCombine/X86/pr67803.ll +++ b/llvm/test/Transforms/VectorCombine/X86/pr67803.ll @@ -5,8 +5,8 @@ define <4 x i64> @PR67803(<8 x i32> %x, <8 x i32> %y, <8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @PR67803( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> ; CHECK-NEXT: [[CONCAT:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i64> [[CONCAT]] to <8 x float> ; CHECK-NEXT: [[SEL:%.*]] = tail call noundef <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[MASK]]) diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll index a14995403cfac..60a6c4b1d9b93 100644 --- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll @@ -12,11 +12,11 @@ define <4 x double> @PR60649() { ; CHECK: unreachable: ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] ; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[T0]], -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP0]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T5]] ; From 01af3d6ebc8ec57fe3575d211b35123804c162ea Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 2 May 2024 15:28:16 +0100 Subject: [PATCH 6/6] Exclude div/rem --- .../Transforms/Vectorize/VectorCombine.cpp | 6 +++-- .../AArch64/shuffletoidentity.ll | 22 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 1a0f90a080769..04d2ff594bf5f 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1762,8 +1762,10 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { })) return false; - // Check the operator is one that we support. - if (isa(Item[0].first)) { + // Check the operator is one that we support. We exclude div/rem in case + // they hit UB from poison lanes. + if (isa(Item[0].first) && + !cast(Item[0].first)->isIntDivRem()) { Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0)); Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1)); } else if (isa(Item[0].first)) { diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 43ece88256acb..b96732e24ce4e 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -486,6 +486,28 @@ define <8 x i8> @intrinsics_different(<8 x i8> %a, <8 x i8> %b) { ret <8 x i8> %r } +; div and rem are currently excluded. +define <8 x i8> @div(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @div( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[ABT:%.*]] = udiv <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = udiv <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> + %abt = udiv <4 x i8> %at, %bt + %abb = udiv <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> + ret <8 x i8> %r +} + define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) { ; CHECK-LABEL: @v8f64interleave( ; CHECK-NEXT: entry: