diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h index fab456d925526..54cb8fa6ea848 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.h @@ -121,6 +121,7 @@ class DGNode { assert(UnscheduledSuccs > 0 && "Counting error!"); --UnscheduledSuccs; } + void incrUnscheduledSuccs() { ++UnscheduledSuccs; } /// \Returns true if all dependent successors have been scheduled. bool ready() const { return UnscheduledSuccs == 0; } /// \Returns true if this node has been scheduled. diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h index 0da1894c90613..0fa40e00d23fc 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h @@ -54,6 +54,22 @@ class ReadyListContainer { } bool empty() const { return List.empty(); } void clear() { List = {}; } + /// \Removes \p N if found in the ready list. + void remove(DGNode *N) { + // TODO: Use a more efficient data-structure for the ready list because the + // priority queue does not support fast removals. + SmallVector Keep; + Keep.reserve(List.size()); + while (!List.empty()) { + auto *Top = List.top(); + List.pop(); + if (Top == N) + break; + Keep.push_back(Top); + } + for (auto *KeepN : Keep) + List.push(KeepN); + } #ifndef NDEBUG void dump(raw_ostream &OS) const; LLVM_DUMP_METHOD void dump() const; @@ -116,6 +132,8 @@ class Scheduler { /// The dependency graph is used by the scheduler to determine the legal /// ordering of instructions. DependencyGraph DAG; + friend class SchedulerInternalsAttorney; // For DAG. + Context &Ctx; /// This is the top of the schedule, i.e. the location where the scheduler /// is about to place the scheduled instructions. It gets updated as we /// schedule. @@ -124,6 +142,12 @@ class Scheduler { DenseMap> Bndls; /// The BB that we are currently scheduling. BasicBlock *ScheduledBB = nullptr; + /// The ID of the callback we register with Sandbox IR. + std::optional CreateInstrCB; + /// Called by Sandbox IR's callback system, after \p I has been created. + /// NOTE: This should run after DAG's callback has run. + // TODO: Perhaps call DAG's notify function from within this one? + void notifyCreateInstr(Instruction *I); /// \Returns a scheduling bundle containing \p Instrs. SchedBundle *createBundle(ArrayRef Instrs); @@ -153,8 +177,16 @@ class Scheduler { Scheduler &operator=(const Scheduler &) = delete; public: - Scheduler(AAResults &AA, Context &Ctx) : DAG(AA, Ctx) {} - ~Scheduler() {} + Scheduler(AAResults &AA, Context &Ctx) : DAG(AA, Ctx), Ctx(Ctx) { + // NOTE: The scheduler's callback depends on the DAG's callback running + // before it and updating the DAG accordingly. + CreateInstrCB = Ctx.registerCreateInstrCallback( + [this](Instruction *I) { notifyCreateInstr(I); }); + } + ~Scheduler() { + if (CreateInstrCB) + Ctx.unregisterCreateInstrCallback(*CreateInstrCB); + } /// Tries to build a schedule that includes all of \p Instrs scheduled at the /// same scheduling cycle. This essentially checks that there are no /// dependencies among \p Instrs. This function may involve scheduling @@ -180,6 +212,13 @@ class Scheduler { #endif }; +/// A client-attorney class for accessing the Scheduler's internals (used for +/// unit tests). +class SchedulerInternalsAttorney { +public: + static DependencyGraph &getDAG(Scheduler &Sched) { return Sched.DAG; } +}; + } // namespace llvm::sandboxir #endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_SCHEDULER_H diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp index e54e74e1bbecd..dd24cc3d98cf8 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp @@ -86,6 +86,31 @@ void Scheduler::scheduleAndUpdateReadyList(SchedBundle &Bndl) { } } +void Scheduler::notifyCreateInstr(Instruction *I) { + // The DAG notifier should have run by now. + auto *N = DAG.getNode(I); + // If there is no DAG node for `I` it means that this is out of scope for the + // DAG and as such out of scope for the scheduler too, so nothing to do. + if (N == nullptr) + return; + // If the instruction is inserted below the top-of-schedule then we mark it as + // "scheduled". + bool IsScheduled = ScheduleTopItOpt && + *ScheduleTopItOpt != I->getParent()->end() && + (*ScheduleTopItOpt.value()).comesBefore(I); + if (IsScheduled) + N->setScheduled(true); + // If the new instruction is above the top of schedule we need to remove its + // dependency predecessors from the ready list and increment their + // `UnscheduledSuccs` counters. + if (!IsScheduled) { + for (auto *PredN : N->preds(DAG)) { + ReadyList.remove(PredN); + PredN->incrUnscheduledSuccs(); + } + } +} + SchedBundle *Scheduler::createBundle(ArrayRef Instrs) { SchedBundle::ContainerTy Nodes; Nodes.reserve(Instrs.size()); diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll new file mode 100644 index 0000000000000..847c978aa4912 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s + +; This used to crash because the newly added pack instructions would not update +; the DAG and scheduler, leading to def-after-use. +define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) { +; CHECK-LABEL: define void @check_dag_scheduler_update( +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33 +; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34 +; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4 +; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1 +; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2 +; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3 +; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]] +; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; + %i = load i32, ptr %p + %i1 = load i32, ptr %p + %add = add nsw i32 %i, %i1 + store i32 %add, ptr %p1 + %arrayidx4 = getelementptr i32, ptr %p, i64 32 + %i2 = load i32, ptr %arrayidx4 + %arrayidx6 = getelementptr i32, ptr %p, i64 1 + %i3 = load i32, ptr %arrayidx6 + %add7 = add nsw i32 %i2, %i3 + %arrayidx9 = getelementptr i32, ptr %p1, i64 1 + store i32 %add7, ptr %arrayidx9 + %arrayidx11 = getelementptr i32, ptr %p, i64 33 + %i4 = load i32, ptr %arrayidx11 + %arrayidx13 = getelementptr i32, ptr %p, i64 2 + %i5 = load i32, ptr %arrayidx13 + %add14 = add nsw i32 %i4, %i5 + %arrayidx16 = getelementptr i32, ptr %p1, i64 2 + store i32 %add14, ptr %arrayidx16 + %arrayidx18 = getelementptr i32, ptr %p, i64 34 + %i6 = load i32, ptr %arrayidx18 + %arrayidx19 = getelementptr i32, ptr %p, i64 3 + %i7 = load i32, ptr %arrayidx19 + %add21 = add nsw i32 %i6, %i7 + %arrayidx23 = getelementptr i32, ptr %p1, i64 3 + store i32 %add21, ptr %arrayidx23 + ret void +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp index 5a2b92ed24b03..373af27ffbff0 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp @@ -289,3 +289,106 @@ define void @foo(ptr noalias %ptr0, ptr noalias %ptr1, i8 %v0, i8 %v1) { EXPECT_FALSE(Sched.trySchedule({S0, S1})); } } + +TEST_F(SchedulerTest, NotifyCreateInst) { + parseIR(C, R"IR( +define void @foo(ptr noalias %ptr, ptr noalias %ptr1, ptr noalias %ptr2) { + %ld0 = load i8, ptr %ptr + store i8 %ld0, ptr %ptr + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *L0 = cast(&*It++); + auto *S0 = cast(&*It++); + auto *Ret = cast(&*It++); + auto *Ptr1 = F->getArg(1); + auto *Ptr2 = F->getArg(2); + + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + // Schedule Ret and S0. The top of schedule should be at S0. + EXPECT_TRUE(Sched.trySchedule({Ret})); + EXPECT_TRUE(Sched.trySchedule({S0})); + auto &DAG = sandboxir::SchedulerInternalsAttorney::getDAG(Sched); + DAG.extend({L0}); + auto *L0N = DAG.getNode(L0); + EXPECT_EQ(L0N->getNumUnscheduledSuccs(), 0u); + // We should have DAG nodes for all instructions at this point + + // Now create a new instruction below S0. + sandboxir::StoreInst *NewS1 = + sandboxir::StoreInst::create(L0, Ptr1, Align(8), Ret->getIterator(), + /*IsVolatile=*/false, Ctx); + // Check that it is marked as "scheduled". + auto *NewS1N = DAG.getNode(NewS1); + EXPECT_TRUE(NewS1N->scheduled()); + // Check that L0's UnscheduledSuccs are still == 0 since NewS1 is "scheduled". + EXPECT_EQ(L0N->getNumUnscheduledSuccs(), 0u); + + // Now create a new instruction above S0. + sandboxir::StoreInst *NewS2 = + sandboxir::StoreInst::create(L0, Ptr2, Align(8), S0->getIterator(), + /*IsVolatile=*/false, Ctx); + // Check that it is not marked as "scheduled". + auto *NewS2N = DAG.getNode(NewS2); + EXPECT_FALSE(NewS2N->scheduled()); + // Check that L0's UnscheduledSuccs got updated because of NewS2. + EXPECT_EQ(L0N->getNumUnscheduledSuccs(), 1u); + + sandboxir::ReadyListContainer ReadyList; + // Check empty(). + EXPECT_TRUE(ReadyList.empty()); +} + +TEST_F(SchedulerTest, ReadyList) { + parseIR(C, R"IR( +define void @foo(ptr %ptr) { + %ld0 = load i8, ptr %ptr + store i8 %ld0, ptr %ptr + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *L0 = cast(&*It++); + auto *S0 = cast(&*It++); + auto *Ret = cast(&*It++); + + sandboxir::DependencyGraph DAG(getAA(*LLVMF), Ctx); + DAG.extend({&*BB->begin(), BB->getTerminator()}); + auto *L0N = DAG.getNode(L0); + auto *S0N = DAG.getNode(S0); + auto *RetN = DAG.getNode(Ret); + + sandboxir::ReadyListContainer ReadyList; + // Check empty(). + EXPECT_TRUE(ReadyList.empty()); + // Check insert(), pop(). + ReadyList.insert(L0N); + EXPECT_FALSE(ReadyList.empty()); + EXPECT_EQ(ReadyList.pop(), L0N); + // Check clear(). + ReadyList.insert(L0N); + EXPECT_FALSE(ReadyList.empty()); + ReadyList.clear(); + EXPECT_TRUE(ReadyList.empty()); + // Check remove(). + EXPECT_TRUE(ReadyList.empty()); + ReadyList.remove(L0N); // Removing a non-existing node should be valid. + ReadyList.insert(L0N); + ReadyList.insert(S0N); + ReadyList.insert(RetN); + ReadyList.remove(S0N); + DenseSet Nodes; + Nodes.insert(ReadyList.pop()); + Nodes.insert(ReadyList.pop()); + EXPECT_TRUE(ReadyList.empty()); + EXPECT_THAT(Nodes, testing::UnorderedElementsAre(L0N, RetN)); +}