From 01e01195ac2f25e6483e569a7a0f6b4062505a38 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Wed, 5 Mar 2025 15:04:09 -0800 Subject: [PATCH 1/2] [SandboxVec] Add a simple pack reuse pass This patch implements a simple pass that tries to de-duplicate packs. If there are two packing patterns inserting the exact same values in the exact same order, then we will keep the top-most one of them. Even though such patterns may be optimized away by subsequent passes it is still useful to do this within the vectorizer because otherwise the cost estimation may be off, making the vectorizer over conservative. --- .../SandboxVectorizer/Passes/PackReuse.h | 36 ++++++++++ .../Vectorize/SandboxVectorizer/VecUtils.h | 70 +++++++++++++++++- llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 + .../SandboxVectorizer/Passes/PackReuse.cpp | 53 ++++++++++++++ .../SandboxVectorizer/Passes/PassRegistry.def | 1 + .../SandboxVectorizerPassBuilder.cpp | 1 + .../SandboxVectorizer/pack_reuse_basic.ll | 71 +++++++++++++++++++ .../pack_reuse_end_to_end.ll | 45 ++++++++++++ .../SandboxVectorizer/VecUtilsTest.cpp | 50 +++++++++++++ 9 files changed, 326 insertions(+), 2 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h create mode 100644 llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp create mode 100644 llvm/test/Transforms/SandboxVectorizer/pack_reuse_basic.ll create mode 100644 llvm/test/Transforms/SandboxVectorizer/pack_reuse_end_to_end.ll diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h new file mode 100644 index 0000000000000..bd0bf244f6b1d --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h @@ -0,0 +1,36 @@ +//===- PackReuse.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A pack de-duplication pass. +// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/SandboxIR/Pass.h" +#include "llvm/SandboxIR/Region.h" + +namespace llvm::sandboxir { + +/// This pass aims at de-duplicating packs, i.e., try to reuse already existing +/// pack patterns instead of keeping both. +/// This is useful because even though the duplicates will most probably be +/// optimized away by future passes, their added cost can make vectorization +/// more conservative than it should be. +class PackReuse final : public RegionPass { + bool Change = false; + +public: + PackReuse() : RegionPass("pack-reuse") {} + bool runOnRegion(Region &Rgn, const Analyses &A) final; +}; + +} // namespace llvm::sandboxir + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index bec1cecf241f6..759662309c2af 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -17,7 +17,25 @@ #include "llvm/SandboxIR/Type.h" #include "llvm/SandboxIR/Utils.h" -namespace llvm::sandboxir { +namespace llvm { +/// Traits for DenseMap. +template <> struct DenseMapInfo> { + static inline SmallVector getEmptyKey() { + return SmallVector({(sandboxir::Value *)-1}); + } + static inline SmallVector getTombstoneKey() { + return SmallVector({(sandboxir::Value *)-2}); + } + static unsigned getHashValue(const SmallVector &Vec) { + return hash_combine_range(Vec.begin(), Vec.end()); + } + static bool isEqual(const SmallVector &Vec1, + const SmallVector &Vec2) { + return Vec1 == Vec2; + } +}; + +namespace sandboxir { class VecUtils { public: @@ -179,6 +197,52 @@ class VecUtils { /// \Returns the first integer power of 2 that is <= Num. static unsigned getFloorPowerOf2(unsigned Num); + /// If \p I is the last instruction of a pack pattern, then this function + /// returns the instructions in the pack and the operands in the pack, else + /// returns nullopt. + static std::optional< + std::pair, SmallVector>> + matchPack(Instruction *I) { + // TODO: Support vector pack patterns. + // TODO: Support out-of-order inserts. + + // Early return if `I` is not an Insert. + if (!isa(I)) + return std::nullopt; + auto *BB0 = I->getParent(); + // The pack contains as many instrs as the lanes of the bottom-most Insert + unsigned ExpectedNumInserts = VecUtils::getNumLanes(I); + assert(ExpectedNumInserts >= 2 && "Expected at least 2 inserts!"); + SmallVector PackInstrs; + SmallVector PackOperands; + PackOperands.resize(ExpectedNumInserts); + // Collect the inserts by walking up the use-def chain. + Instruction *InsertI = I; + for ([[maybe_unused]] auto Cnt : seq(ExpectedNumInserts)) { + if (InsertI == nullptr) + return std::nullopt; + if (InsertI->getParent() != BB0) + return std::nullopt; + // Check the lane. + auto *LaneC = dyn_cast(InsertI->getOperand(2)); + unsigned ExpectedLane = ExpectedNumInserts - Cnt - 1; + if (LaneC == nullptr || LaneC->getSExtValue() != ExpectedLane) + return std::nullopt; + PackInstrs.push_back(InsertI); + PackOperands[ExpectedLane] = InsertI->getOperand(1); + + Value *Op = InsertI->getOperand(0); + if (Cnt == ExpectedNumInserts - 1) { + if (!isa(Op)) + return std::nullopt; + } else { + InsertI = dyn_cast(Op); + } + } + // Check the topmost insert. The operand should be a Poison. + return std::make_pair(PackInstrs, PackOperands); + } + #ifndef NDEBUG /// Helper dump function for debugging. LLVM_DUMP_METHOD static void dump(ArrayRef Bndl); @@ -186,6 +250,8 @@ class VecUtils { #endif // NDEBUG }; -} // namespace llvm::sandboxir +} // namespace sandboxir + +} // namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 63cf1a5e3f7cf..96670fe3ea195 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/Interval.cpp SandboxVectorizer/Legality.cpp SandboxVectorizer/Passes/BottomUpVec.cpp + SandboxVectorizer/Passes/PackReuse.cpp SandboxVectorizer/Passes/RegionsFromBBs.cpp SandboxVectorizer/Passes/RegionsFromMetadata.cpp SandboxVectorizer/Passes/SeedCollection.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp new file mode 100644 index 0000000000000..d427c6d824815 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp @@ -0,0 +1,53 @@ +//===- PackReuse.cpp - A pack de-duplication pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" + +namespace llvm::sandboxir { + +bool PackReuse::runOnRegion(Region &Rgn, const Analyses &A) { + if (Rgn.empty()) + return Change; + // The key to the map is the ordered operands of the pack. + // The value is a vector of all Pack Instrs with the same operands. + DenseMap>, + SmallVector>> + PacksMap; + // Go over the region and look for pack patterns. + for (auto *I : Rgn) { + auto PairOpt = VecUtils::matchPack(I); + if (PairOpt) { + // TODO: For now limit pack reuse within a BB. + BasicBlock *BB = (*PairOpt->first.front()).getParent(); + PacksMap[{BB, PairOpt->second}].push_back(PairOpt->first); + } + } + for (auto &Pair : PacksMap) { + auto &Packs = Pair.second; + if (Packs.size() <= 1) + continue; + // Sort packs by program order. + sort(Packs, [](const auto &PackInstrs1, const auto &PackInstrs2) { + return PackInstrs1.front()->comesBefore(PackInstrs2.front()); + }); + Instruction *TopMostPack = Packs[0].front(); + // Replace duplicate packs with the first one. + for (const auto &PackInstrs : + make_range(std::next(Packs.begin()), Packs.end())) { + PackInstrs.front()->replaceAllUsesWith(TopMostPack); + // Delete the pack instrs bottom-up since they are now dead. + for (auto *PackI : PackInstrs) + PackI->eraseFromParent(); + } + Change = true; + } + return Change; +} + +} // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def index c12bd91997943..02b973926854d 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def @@ -18,6 +18,7 @@ #endif REGION_PASS("null", ::llvm::sandboxir::NullPass) +REGION_PASS("pack-reuse", ::llvm::sandboxir::PackReuse) REGION_PASS("print-instruction-count", ::llvm::sandboxir::PrintInstructionCount) REGION_PASS("print-region", ::llvm::sandboxir::PrintRegion) REGION_PASS("tr-save", ::llvm::sandboxir::TransactionSave) diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp index feeda65b3f6e2..8432b066f966c 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp @@ -2,6 +2,7 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintRegion.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h" diff --git a/llvm/test/Transforms/SandboxVectorizer/pack_reuse_basic.ll b/llvm/test/Transforms/SandboxVectorizer/pack_reuse_basic.ll new file mode 100644 index 0000000000000..33cdaf54252c2 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/pack_reuse_basic.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-passes="regions-from-metadata" %s -S | FileCheck %s + +define void @pack_reuse(i8 %v0, i8 %v1, ptr %ptr) { +; CHECK-LABEL: define void @pack_reuse( +; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META0:![0-9]+]] +; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META0]] +; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]] +; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]] +; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META0]] +; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META0]] +; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META0]] +; CHECK-NEXT: ret void +; + %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0 + %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0 + store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0 + + ; Should reuse PackA1. + %PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0 + %PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0 + store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0 + + ; Should remain. + %PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0 + %PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0 + store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0 + ret void +} + +; TODO: For now we don't support reusing packs from earlier BBs. +define void @pack_cross_bb(i8 %v0, i8 %v1, ptr %ptr) { +; CHECK-LABEL: define void @pack_cross_bb( +; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1:![0-9]+]] +; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META1]] +; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META1]] +; CHECK-NEXT: br label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: [[PACKB0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1]] +; CHECK-NEXT: [[PACKB1:%.*]] = insertelement <2 x i8> [[PACKB0]], i8 [[V1]], i64 1, !sandboxvec [[META1]] +; CHECK-NEXT: store <2 x i8> [[PACKB1]], ptr [[PTR]], align 2, !sandboxvec [[META1]] +; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META1]] +; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META1]] +; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META1]] +; CHECK-NEXT: ret void +; +entry: + %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0 + %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0 + store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0 + br label %bb + +bb: + %PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0 + %PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0 + store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0 + + %PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0 + %PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0 + store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0 + ret void +} + +!0 = distinct !{!"sandboxregion"} +;. +; CHECK: [[META0]] = distinct !{!"sandboxregion"} +; CHECK: [[META1]] = distinct !{!"sandboxregion"} +;. diff --git a/llvm/test/Transforms/SandboxVectorizer/pack_reuse_end_to_end.ll b/llvm/test/Transforms/SandboxVectorizer/pack_reuse_end_to_end.ll new file mode 100644 index 0000000000000..0861c7827c91c --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/pack_reuse_end_to_end.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix NOREUSE +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection" %s -S | FileCheck %s --check-prefix PKREUSE + +define void @pack_reuse(ptr %ptr, ptr %ptrX, ptr %ptrY) { +; NOREUSE-LABEL: define void @pack_reuse( +; NOREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) { +; NOREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4 +; NOREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4 +; NOREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]] +; NOREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]] +; NOREUSE-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0]] +; NOREUSE-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LDY]], i32 1, !sandboxvec [[META0]] +; NOREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; NOREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK1]], [[PACK3]], !sandboxvec [[META0]] +; NOREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]] +; NOREUSE-NEXT: ret void +; +; PKREUSE-LABEL: define void @pack_reuse( +; PKREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) { +; PKREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4 +; PKREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4 +; PKREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]] +; PKREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]] +; PKREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; PKREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK3]], [[PACK3]], !sandboxvec [[META0]] +; PKREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]] +; PKREUSE-NEXT: ret void +; + %ldX = load float, ptr %ptrX + %ldY = load float, ptr %ptrY + + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %sub0 = fsub float %ldX, %ldX + %sub1 = fsub float %ldY, %ldY + store float %sub0, ptr %ptr0 + store float %sub1, ptr %ptr1 + ret void +} +;. +; NOREUSE: [[META0]] = distinct !{!"sandboxregion"} +;. +; PKREUSE: [[META0]] = distinct !{!"sandboxregion"} +;. diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp index 5c062df8112f6..868a53df7d5c5 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -20,6 +20,7 @@ #include "llvm/SandboxIR/Function.h" #include "llvm/SandboxIR/Type.h" #include "llvm/Support/SourceMgr.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; @@ -563,3 +564,52 @@ TEST_F(VecUtilsTest, FloorPowerOf2) { EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u); EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u); } + +TEST_F(VecUtilsTest, MatchPackScalar) { + parseIR(R"IR( +define void @foo(i8 %v0, i8 %v1) { +bb0: + %NotPack = insertelement <2 x i8> poison, i8 %v0, i64 0 + br label %bb1 + +bb1: + %Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0 + %Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1 + + %NotPack0 = insertelement <2 x i8> poison, i8 %v0, i64 0 + %NotPack1 = insertelement <2 x i8> %NotPack0, i8 %v1, i64 0 + %NotPack2 = insertelement <2 x i8> %NotPack1, i8 %v1, i64 1 + + %NotPackBB = insertelement <2 x i8> %NotPack, i8 %v1, i64 1 + + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + auto &BB = getBasicBlockByName(F, "bb1"); + auto It = BB.begin(); + auto *Pack0 = cast(&*It++); + auto *Pack1 = cast(&*It++); + auto *NotPack0 = cast(&*It++); + auto *NotPack1 = cast(&*It++); + auto *NotPack2 = cast(&*It++); + auto *NotPackBB = cast(&*It++); + auto *Ret = cast(&*It++); + auto *Arg0 = F.getArg(0); + auto *Arg1 = F.getArg(1); + EXPECT_FALSE(sandboxir::VecUtils::matchPack(Pack0)); + EXPECT_FALSE(sandboxir::VecUtils::matchPack(Ret)); + { + auto PairOpt = sandboxir::VecUtils::matchPack(Pack1); + EXPECT_TRUE(PairOpt); + EXPECT_THAT(PairOpt->first, testing::ElementsAre(Pack1, Pack0)); + EXPECT_THAT(PairOpt->second, testing::ElementsAre(Arg0, Arg1)); + } + { + for (auto *NotPack : {NotPack0, NotPack1, NotPack2, NotPackBB}) + EXPECT_FALSE(sandboxir::VecUtils::matchPack(NotPack)); + } +} From 59ea3ab47bc366b0696efdd292367375af95acd4 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Tue, 3 Jun 2025 09:28:27 -0700 Subject: [PATCH 2/2] fixup! [SandboxVec] Add a simple pack reuse pass --- .../Vectorize/SandboxVectorizer/VecUtils.h | 50 +++++++++++++------ .../SandboxVectorizer/Passes/PackReuse.cpp | 8 +-- .../SandboxVectorizer/VecUtilsTest.cpp | 8 +-- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index 759662309c2af..252b55163cb09 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -197,12 +197,32 @@ class VecUtils { /// \Returns the first integer power of 2 that is <= Num. static unsigned getFloorPowerOf2(unsigned Num); - /// If \p I is the last instruction of a pack pattern, then this function - /// returns the instructions in the pack and the operands in the pack, else - /// returns nullopt. - static std::optional< - std::pair, SmallVector>> - matchPack(Instruction *I) { + /// Helper struct for `matchPack()`. Describes the instructions and operands + /// of a pack pattern. + struct PackPattern { + /// The insertelement instructions that form the pack pattern in bottom-up + /// order, i.e., the first instruction in `Instrs` is the bottom-most + /// InsertElement instruction of the pack pattern. + /// For example in this simple pack pattern: + /// %Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0 + /// %Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1 + /// this is [ %Pack1, %Pack0 ]. + SmallVector Instrs; + /// The "external" operands of the pack pattern, i.e., the values that get + /// packed into a vector, skipping the ones in `Instrs`. The operands are in + /// bottom-up order, starting from the operands of the bottom-most insert. + /// So in our example this would be [ %v1, %v0 ]. + SmallVector Operands; + }; + + /// If \p I is the last instruction of a pack pattern (i.e., an InsertElement + /// into a vector), then this function returns the instructions in the pack + /// and the operands in the pack, else returns nullopt. + /// Here is an example of a matched pattern: + /// %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0 + /// %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1 + /// TODO: this currently detects only simple canonicalized patterns. + static std::optional matchPack(Instruction *I) { // TODO: Support vector pack patterns. // TODO: Support out-of-order inserts. @@ -213,34 +233,32 @@ class VecUtils { // The pack contains as many instrs as the lanes of the bottom-most Insert unsigned ExpectedNumInserts = VecUtils::getNumLanes(I); assert(ExpectedNumInserts >= 2 && "Expected at least 2 inserts!"); - SmallVector PackInstrs; - SmallVector PackOperands; - PackOperands.resize(ExpectedNumInserts); + PackPattern Pack; + Pack.Operands.resize(ExpectedNumInserts); // Collect the inserts by walking up the use-def chain. Instruction *InsertI = I; - for ([[maybe_unused]] auto Cnt : seq(ExpectedNumInserts)) { + for (auto ExpectedLane : reverse(seq(ExpectedNumInserts))) { if (InsertI == nullptr) return std::nullopt; if (InsertI->getParent() != BB0) return std::nullopt; // Check the lane. auto *LaneC = dyn_cast(InsertI->getOperand(2)); - unsigned ExpectedLane = ExpectedNumInserts - Cnt - 1; if (LaneC == nullptr || LaneC->getSExtValue() != ExpectedLane) return std::nullopt; - PackInstrs.push_back(InsertI); - PackOperands[ExpectedLane] = InsertI->getOperand(1); + Pack.Instrs.push_back(InsertI); + Pack.Operands[ExpectedLane] = InsertI->getOperand(1); Value *Op = InsertI->getOperand(0); - if (Cnt == ExpectedNumInserts - 1) { + if (ExpectedLane == 0) { + // Check the topmost insert. The operand should be a Poison. if (!isa(Op)) return std::nullopt; } else { InsertI = dyn_cast(Op); } } - // Check the topmost insert. The operand should be a Poison. - return std::make_pair(PackInstrs, PackOperands); + return Pack; } #ifndef NDEBUG diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp index d427c6d824815..14a6de30bf398 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.cpp @@ -21,11 +21,11 @@ bool PackReuse::runOnRegion(Region &Rgn, const Analyses &A) { PacksMap; // Go over the region and look for pack patterns. for (auto *I : Rgn) { - auto PairOpt = VecUtils::matchPack(I); - if (PairOpt) { + auto PackOpt = VecUtils::matchPack(I); + if (PackOpt) { // TODO: For now limit pack reuse within a BB. - BasicBlock *BB = (*PairOpt->first.front()).getParent(); - PacksMap[{BB, PairOpt->second}].push_back(PairOpt->first); + BasicBlock *BB = (*PackOpt->Instrs.front()).getParent(); + PacksMap[{BB, PackOpt->Operands}].push_back(PackOpt->Instrs); } } for (auto &Pair : PacksMap) { diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp index 868a53df7d5c5..1751701967e6a 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -603,10 +603,10 @@ define void @foo(i8 %v0, i8 %v1) { EXPECT_FALSE(sandboxir::VecUtils::matchPack(Pack0)); EXPECT_FALSE(sandboxir::VecUtils::matchPack(Ret)); { - auto PairOpt = sandboxir::VecUtils::matchPack(Pack1); - EXPECT_TRUE(PairOpt); - EXPECT_THAT(PairOpt->first, testing::ElementsAre(Pack1, Pack0)); - EXPECT_THAT(PairOpt->second, testing::ElementsAre(Arg0, Arg1)); + auto PackOpt = sandboxir::VecUtils::matchPack(Pack1); + EXPECT_TRUE(PackOpt); + EXPECT_THAT(PackOpt->Instrs, testing::ElementsAre(Pack1, Pack0)); + EXPECT_THAT(PackOpt->Operands, testing::ElementsAre(Arg0, Arg1)); } { for (auto *NotPack : {NotPack0, NotPack1, NotPack2, NotPackBB})