diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index d65a04c0df6ee..f080111f08d45 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -232,6 +232,9 @@ void DependencyGraph::setDefUseUnscheduledSuccs( auto *OpI = dyn_cast(Op); if (OpI == nullptr) continue; + // TODO: For now don't cross BBs. + if (OpI->getParent() != I.getParent()) + continue; if (!NewInterval.contains(OpI)) continue; auto *OpN = getNode(OpI); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index 18c3b375c92a2..06a1769e535b1 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -169,14 +169,19 @@ Value *BottomUpVec::createVectorInstr(ArrayRef Bndl, } void BottomUpVec::tryEraseDeadInstrs() { - // Visiting the dead instructions bottom-to-top. - SmallVector SortedDeadInstrCandidates( - DeadInstrCandidates.begin(), DeadInstrCandidates.end()); - sort(SortedDeadInstrCandidates, - [](Instruction *I1, Instruction *I2) { return I1->comesBefore(I2); }); - for (Instruction *I : reverse(SortedDeadInstrCandidates)) { - if (I->hasNUses(0)) - I->eraseFromParent(); + DenseMap> SortedDeadInstrCandidates; + // The dead instrs could span BBs, so we need to collect and sort them per BB. + for (auto *DeadI : DeadInstrCandidates) + SortedDeadInstrCandidates[DeadI->getParent()].push_back(DeadI); + for (auto &Pair : SortedDeadInstrCandidates) + sort(Pair.second, + [](Instruction *I1, Instruction *I2) { return I1->comesBefore(I2); }); + for (const auto &Pair : SortedDeadInstrCandidates) { + for (Instruction *I : reverse(Pair.second)) { + if (I->hasNUses(0)) + // Erase the dead instructions bottom-to-top. + I->eraseFromParent(); + } } DeadInstrCandidates.clear(); } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp index f9cdbe8aea170..496521b95a98e 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp @@ -206,6 +206,13 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { // We start scheduling at the bottom instr of Instrs. ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); + // TODO: For now don't cross BBs. + if (!DAG.getInterval().empty()) { + auto *BB = DAG.getInterval().top()->getParent(); + if (any_of(Instrs, [BB](auto *I) { return I->getParent() != BB; })) + return false; + } + // Extend the DAG to include Instrs. Interval Extension = DAG.extend(Instrs); // Add nodes to ready list. diff --git a/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll b/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll new file mode 100644 index 0000000000000..e913fc5913ba7 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/cross_bbs.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s + +define void @cross_bbs(ptr %ptr) { +; CHECK-LABEL: define void @cross_bbs( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[L0:%.*]] = load i8, ptr [[PTR0]], align 1 +; CHECK-NEXT: [[L1:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i8> poison, i8 [[L0]], i32 0 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i8> [[PACK]], i8 [[L1]], i32 1 +; CHECK-NEXT: br label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: store <2 x i8> [[PACK1]], ptr [[PTR0]], align 1 +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %l0 = load i8, ptr %ptr0 + %l1 = load i8, ptr %ptr1 + br label %bb + +bb: + store i8 %l0, ptr %ptr0 + store i8 %l1, ptr %ptr1 + ret void +}