Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//===- SeedCollector.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains the mechanism for collecting the seed instructions that
// are used as starting points for forming the vectorization graph.
//===----------------------------------------------------------------------===//

#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/SandboxIR/Value.h"
#include <iterator>
#include <memory>

namespace llvm::sandboxir {

/// A set of candidate Instructions for vectorizing together.
class SeedBundle {
public:
using SeedList = SmallVector<Instruction *>;
/// Initialize a bundle with \p I.
explicit SeedBundle(Instruction *I) { insertAt(begin(), I); }
explicit SeedBundle(SeedList &&L) : Seeds(std::move(L)) {
for (auto &S : Seeds)
NumUnusedBits += Utils::getNumBits(S);
}
/// No need to allow copies.
SeedBundle(const SeedBundle &) = delete;
SeedBundle &operator=(const SeedBundle &) = delete;
virtual ~SeedBundle() {}

using iterator = SeedList::iterator;
using const_iterator = SeedList::const_iterator;
iterator begin() { return Seeds.begin(); }
iterator end() { return Seeds.end(); }
const_iterator begin() const { return Seeds.begin(); }
const_iterator end() const { return Seeds.end(); }

Instruction *operator[](unsigned Idx) const { return Seeds[Idx]; }

/// Insert \p I into position \p P. Clients should choose Pos
/// by symbol, symbol-offset, and program order (which depends if scheduling
/// bottom-up or top-down).
void insertAt(iterator Pos, Instruction *I) {
#ifdef EXPENSIVE_CHECKS
for (auto Itr : Seeds) {
assert(*Itr != I && "Attempt to insert an instruction twice.");
}
#endif
Seeds.insert(Pos, I);
NumUnusedBits += Utils::getNumBits(I);
}

unsigned getFirstUnusedElementIdx() const {
for (unsigned ElmIdx : seq<unsigned>(0, Seeds.size()))
if (!isUsed(ElmIdx))
return ElmIdx;
return Seeds.size();
}
/// Marks instruction \p I "used" within the bundle. Clients
/// use this property when assembling a vectorized instruction from
/// the seeds in a bundle. This allows constant time evaluation
/// and "removal" from the list.
void setUsed(Instruction *I) {
auto It = std::find(begin(), end(), I);
assert(It != end() && "Instruction not in the bundle!");
auto Idx = It - begin();
setUsed(Idx, 1, /*VerifyUnused=*/false);
}

void setUsed(unsigned ElementIdx, unsigned Sz = 1, bool VerifyUnused = true) {
if (ElementIdx + Sz >= UsedLanes.size())
UsedLanes.resize(ElementIdx + Sz);
for (unsigned Idx : seq<unsigned>(ElementIdx, ElementIdx + Sz)) {
assert((!VerifyUnused || !UsedLanes.test(Idx)) &&
"Already marked as used!");
UsedLanes.set(Idx);
UsedLaneCount++;
}
NumUnusedBits -= Utils::getNumBits(Seeds[ElementIdx]);
}
/// \Returns whether or not \p Element has been used.
bool isUsed(unsigned Element) const {
return Element < UsedLanes.size() && UsedLanes.test(Element);
}
bool allUsed() const { return UsedLaneCount == Seeds.size(); }
unsigned getNumUnusedBits() const { return NumUnusedBits; }

/// \Returns a slice of seed elements, starting at the element \p StartIdx,
/// with a total size <= \p MaxVecRegBits, or an empty slice if the
/// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
/// slice will have a total number of bits that is a power of 2.
MutableArrayRef<SeedList> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
bool ForcePowOf2);

protected:
SeedList Seeds;
/// The lanes that we have already vectorized.
BitVector UsedLanes;
/// Tracks used lanes for constant-time accessor.
unsigned UsedLaneCount = 0;
/// Tracks the remaining bits available to vectorize
unsigned NumUnusedBits = 0;

public:
#ifndef NDEBUG
void dump(raw_ostream &OS) const {
for (auto [ElmIdx, I] : enumerate(*this)) {
OS.indent(2) << ElmIdx << ". ";
if (isUsed(ElmIdx))
OS << "[USED]";
else
OS << *I;
OS << "\n";
}
}
LLVM_DUMP_METHOD void dump() const {
dump(dbgs());
dbgs() << "\n";
}
#endif // NDEBUG
};
} // namespace llvm::sandboxir
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SEEDCOLLECTOR_H
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
SandboxVectorizer/DependencyGraph.cpp
SandboxVectorizer/Passes/BottomUpVec.cpp
SandboxVectorizer/SandboxVectorizer.cpp
SandboxVectorizer/SeedCollector.cpp
SLPVectorizer.cpp
Vectorize.cpp
VectorCombine.cpp
Expand Down
62 changes: 62 additions & 0 deletions llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===- SeedCollection.cpp -0000000----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Type.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/Support/Debug.h"
#include <span>

using namespace llvm;
namespace llvm::sandboxir {

MutableArrayRef<SeedBundle::SeedList>
SeedBundle::getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
bool ForcePowerOf2) {
// Use uint32_t here for compatibility with IsPowerOf2_32

// BitCount tracks the size of the working slice. From that we can tell
// when the working slice's size is a power-of-two and when it exceeds
// the legal size in MaxVecBits.
uint32_t BitCount = 0;
uint32_t NumElements = 0;
// Can't start a slice with a used instruction.
assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
uint32_t InstBits = Utils::getNumBits(S);
// Stop if this instruction is used, or if adding it puts the slice over
// the limit.
if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
break;
NumElements++;
BitCount += Utils::getNumBits(S);
}
// Most slices will already be power-of-two-sized. But this one isn't, remove
// instructions until it is. This could be tracked in the loop above but the
// logic is harder to follow. TODO: Move if performance is unacceptable.
if (ForcePowerOf2) {
while (!isPowerOf2_32(BitCount) && NumElements > 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could avoid iterating until we get a power of 2 with the help of a function that gives you the floor power-of-2 value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewrote this to do it in the above loop. Seems pretty clean now.

BitCount -= Utils::getNumBits(Seeds[StartIdx + NumElements - 1]);
NumElements--;
}
}

// Return any non-empty slice
if (NumElements > 1)
return MutableArrayRef<SeedBundle::SeedList>(&Seeds + StartIdx,
NumElements);
else
return {};
}

} // namespace llvm::sandboxir
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ add_llvm_unittest(SandboxVectorizerTests
DependencyGraphTest.cpp
IntervalTest.cpp
LegalityTest.cpp
)
SeedCollectorTest.cpp
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//===- SeedCollectorTest.cpp ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/SandboxIR/Function.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"

using namespace llvm;

struct SeedBundleTest : public testing::Test {
LLVMContext C;
std::unique_ptr<Module> M;

void parseIR(LLVMContext &C, const char *IR) {
SMDiagnostic Err;
M = parseAssemblyString(IR, Err, C);
if (!M)
Err.print("LegalityTest", errs());
}
};

TEST_F(SeedBundleTest, SeedBundle) {
parseIR(C, R"IR(
define void @foo(float %v0, i32 %i0, i16 %i1, i8 %i2) {
bb:
%add0 = fadd float %v0, %v0
%add1 = fadd float %v0, %v0
%add2 = add i8 %i2, %i2
%add3 = add i16 %i1, %i1
%add4 = add i32 %i0, %i0
%add5 = add i16 %i1, %i1
%add6 = add i8 %i2, %i2
%add7 = add i8 %i2, %i2
ret void
}
)IR");
Function &LLVMF = *M->getFunction("foo");
sandboxir::Context Ctx(C);
auto &F = *Ctx.createFunction(&LLVMF);
DataLayout DL(M->getDataLayout());
auto *BB = &*F.begin();
auto It = BB->begin();
auto *I0 = &*It++;
auto *I1 = &*It++;
// Assume first two instructions are identical in the number of bits.
const unsigned IOBits = sandboxir::Utils::getNumBits(I0, DL);
// Constructor
sandboxir::SeedBundle SBO(I0);
EXPECT_EQ(*SBO.begin(), I0);
// getNumUnusedBits after constructor
EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
// setUsed
SBO.setUsed(I0);
// allUsed
EXPECT_TRUE(SBO.allUsed());
// isUsed
EXPECT_TRUE(SBO.isUsed(0));
// getNumUnusedBits after setUsed
EXPECT_EQ(SBO.getNumUnusedBits(), 0u);
// insertAt
SBO.insertAt(SBO.end(), I1);
EXPECT_NE(*SBO.begin(), I1);
// getNumUnusedBits after insertAt
EXPECT_EQ(SBO.getNumUnusedBits(), IOBits);
// allUsed
EXPECT_FALSE(SBO.allUsed());
// getFirstUnusedElement
EXPECT_EQ(SBO.getFirstUnusedElementIdx(), 1u);

sandboxir::SeedBundle::SeedList Seeds;
// add2 through add7
Seeds.push_back(&*It++);
Seeds.push_back(&*It++);
Seeds.push_back(&*It++);
Seeds.push_back(&*It++);
Seeds.push_back(&*It++);
Seeds.push_back(&*It++);
unsigned BundleBits = 0;
for (auto &S : Seeds)
BundleBits += sandboxir::Utils::getNumBits(S);
// Ensure the instructions are as expected.
EXPECT_EQ(BundleBits, 88u);
// Constructor
sandboxir::SeedBundle SB1(std::move(Seeds));
// getNumUnusedBits after constructor
EXPECT_EQ(SB1.getNumUnusedBits(), BundleBits);
// setUsed with index
SB1.setUsed(1);
// getFirstUnusedElementIdx
EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 0u);
SB1.setUsed(unsigned(0));
// getFirstUnusedElementIdx not at end
EXPECT_EQ(SB1.getFirstUnusedElementIdx(), 2u);
// getSlice
auto Slice0 = SB1.getSlice(2, /* MaxVecRegBits */ 64,
/* ForcePowerOf2 */ true);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't getSlice(2, 64, false) return Insts[2], Insts[3], Insts[4] ,Insts[5] too?

Perhaps we also need a test for getSlice(2, 72, true) and getSlice(2, 80, true) to make sure that they both return Insts[2], Insts[3], Insts[4] ,Insts[5] ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it will. Added the additional tests.

EXPECT_EQ(Slice0.size(), 4u);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we check the contents of the Slice? Something like:

EXPECT_THAT(Slice0, testing::ElementsAre(I2, I3, I4, I5))`

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

SB1.setUsed(2);
auto Slice1 = SB1.getSlice(3, /* MaxVecRegBits */ 64,
/* ForcePowerOf2 */ false);
EXPECT_EQ(Slice1.size(), 3u);
// getSlice empty case
SB1.setUsed(3);
auto Slice2 = SB1.getSlice(4, /* MaxVecRegBits */ 8,
/* ForcePowerOf2 */ true);
EXPECT_EQ(Slice2.size(), 0u);
}
Loading