@@ -3835,16 +3835,16 @@ class BoUpSLP {
38353835 continue;
38363836 auto It = ScalarToTreeEntries.find(V);
38373837 Instruction *I = dyn_cast<Instruction>(V);
3838- bool IsAltInst = (I) ? I->getOpcode() != Opcode : false;
3839- if (S.isAltOpCopy() && IsAltInst) {
3840- CopyableAltOp.insert(V);
3841- continue;
3842- }
38433838 assert(
38443839 (It == ScalarToTreeEntries.end() ||
38453840 (It->getSecond().size() == 1 && It->getSecond().front() == Last) ||
38463841 doesNotNeedToBeScheduled(V)) &&
38473842 "Scalar already in tree!");
3843+ bool IsAltInst = (I) ? I->getOpcode() != Opcode : false;
3844+ if (S.isAltOpCopy() && IsAltInst) {
3845+ CopyableAltOp[V] = Last;
3846+ continue;
3847+ }
38483848 if (It == ScalarToTreeEntries.end()) {
38493849 ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
38503850 (void)Processed.insert(V);
@@ -3954,8 +3954,8 @@ class BoUpSLP {
39543954 /// A list of scalars that we found that we need to keep as scalars.
39553955 ValueSet MustGather;
39563956
3957- /// A set op scalars that we are considoring as copyable operations .
3958- ValueSet CopyableAltOp;
3957+ /// Maps a scalar copies to the its tree entry(ies) .
3958+ SmallDenseMap<Value *, TreeEntry *> CopyableAltOp;
39593959
39603960 /// A set of first non-schedulable values.
39613961 ValueSet NonScheduledFirst;
@@ -4264,6 +4264,9 @@ class BoUpSLP {
42644264
42654265 /// True if this instruction is a copy.
42664266 bool IsCopy = false;
4267+
4268+ /// Points to where copyable instruction was introduced.
4269+ ScheduleData *CopyInst = nullptr;
42674270 };
42684271
42694272#ifndef NDEBUG
@@ -4413,6 +4416,23 @@ class BoUpSLP {
44134416 for (Use &U : BundleMember->Inst->operands())
44144417 if (auto *I = dyn_cast<Instruction>(U.get()))
44154418 DecrUnsched(I);
4419+ // Handle a copy instruction dependencies.
4420+ if (TE && TE->isAltOpCopy() && BundleMember->IsCopy) {
4421+ doForAllOpcodes(BundleMember->Inst, [BundleMember, &ReadyList](
4422+ ScheduleData *CopyUse) {
4423+ if (BundleMember != CopyUse && CopyUse->hasValidDependencies() &&
4424+ CopyUse->incrementUnscheduledDeps(-1) == 0) {
4425+ ScheduleData *DepBundle = CopyUse->FirstInBundle;
4426+ assert(!DepBundle->IsScheduled &&
4427+ "already scheduled bundle gets ready");
4428+ if (DepBundle->isReady()) {
4429+ ReadyList.insert(DepBundle);
4430+ LLVM_DEBUG(dbgs() << "SLP: gets ready (copyable): "
4431+ << *DepBundle << "\n");
4432+ }
4433+ }
4434+ });
4435+ }
44164436 }
44174437 // Handle the memory dependencies.
44184438 for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
@@ -4498,8 +4518,8 @@ class BoUpSLP {
44984518
44994519 /// Build a bundle from the ScheduleData nodes corresponding to the
45004520 /// scalar instruction for each lane.
4501- ScheduleData *buildBundle(ArrayRef<Value *> VL, const InstructionsState &S ,
4502- bool &ReSchedule);
4521+ ScheduleData *buildBundle(ArrayRef<Value *> VL, BoUpSLP *SLP ,
4522+ const InstructionsState &S, bool &ReSchedule);
45034523
45044524 /// Checks if a bundle of instructions can be scheduled, i.e. has no
45054525 /// cyclic dependencies. This is only a dry-run, no instructions are
@@ -17606,8 +17626,10 @@ void BoUpSLP::optimizeGatherSequence() {
1760617626 GatherShuffleExtractSeq.clear();
1760717627}
1760817628
17609- BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::buildBundle(
17610- ArrayRef<Value *> VL, const InstructionsState &S, bool &ReSchedule) {
17629+ BoUpSLP::ScheduleData *
17630+ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
17631+ const InstructionsState &S,
17632+ bool &ReSchedule) {
1761117633 ScheduleData *Bundle = nullptr;
1761217634 ScheduleData *PrevInBundle = nullptr;
1761317635 unsigned Opcode = S.getOpcode();
@@ -17675,6 +17697,13 @@ BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::buildBundle(
1767517697 if (S.isAltOpCopy() && IsAltInst)
1767617698 BundleMember->IsCopy = true;
1767717699 PrevInBundle = BundleMember;
17700+ if (SLP->CopyableAltOp.contains(I)) {
17701+ TreeEntry *TE = SLP->CopyableAltOp[I];
17702+ assert(TE && "Incorrect state");
17703+ ScheduleData *SD = getScheduleData(I, TE);
17704+ assert(SD && SD->IsCopy && "ScheduleData incorrect state");
17705+ BundleMember->CopyInst = SD;
17706+ }
1767817707 }
1767917708 assert(Bundle && "Failed to find schedule bundle");
1768017709 return Bundle;
@@ -17772,7 +17801,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
1777217801 ReSchedule = true;
1777317802 }
1777417803
17775- auto *Bundle = buildBundle(VL, S, ReSchedule);
17804+ auto *Bundle = buildBundle(VL, SLP, S, ReSchedule);
1777617805 if (!Bundle)
1777717806 return std::nullopt;
1777817807 TryScheduleBundleImpl(ReSchedule, Bundle);
@@ -17820,6 +17849,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
1782017849 BundleMember->NextInBundle = nullptr;
1782117850 BundleMember->TE = nullptr;
1782217851 BundleMember->IsCopy = false;
17852+ BundleMember->CopyInst = nullptr;
1782317853 if (BundleMember->unscheduledDepsInBundle() == 0) {
1782417854 ReadyInsts.insert(BundleMember);
1782517855 }
@@ -18010,6 +18040,12 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
1801018040 BundleMember->Dependencies = 0;
1801118041 BundleMember->resetUnscheduledDeps();
1801218042
18043+ // Handle copy instruction dependencies.
18044+ if (BundleMember->CopyInst) {
18045+ BundleMember->Dependencies++;
18046+ BundleMember->incrementUnscheduledDeps(1);
18047+ }
18048+
1801318049 // Handle def-use chain dependencies.
1801418050 for (User *U : BundleMember->Inst->users()) {
1801518051 if (auto *I = dyn_cast<Instruction>(U)) {
@@ -18240,7 +18276,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
1824018276 BS->initialFillReadyList(ReadyInsts);
1824118277
1824218278 Instruction *LastScheduledInst = BS->ScheduleEnd;
18243- DenseMap<ScheduleData *, ScheduleData *> ReschedMap;
1824418279
1824518280 auto ReorderBundle = [this](ScheduleData *SD) {
1824618281 SmallVector<Instruction *, 2> Insts;
@@ -18273,16 +18308,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
1827318308 ScheduleData *Picked = *ReadyInsts.begin();
1827418309 ReadyInsts.erase(ReadyInsts.begin());
1827518310
18276- // Reorder copyable elements to emit after main operations.
18277- for (ScheduleData *BundleMember = Picked; BundleMember;
18278- BundleMember = BundleMember->NextInBundle) {
18279- if (CopyableAltOp.contains(BundleMember->Inst)) {
18280- ScheduleData *SD = CopyElementsMap[BundleMember->Inst];
18281- if (SD && SD->FirstInBundle != Picked)
18282- ReschedMap[SD] = Picked;
18283- }
18284- }
18285-
1828618311 // Move the scheduled instruction(s) to their dedicated places, if not
1828718312 // there yet.
1828818313 for (Instruction *PickedInst : ReorderBundle(Picked)) {
@@ -18291,15 +18316,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
1829118316 PickedInst->moveAfter(LastScheduledInst->getPrevNode());
1829218317 LastScheduledInst = PickedInst;
1829318318 }
18294- if (ReschedMap.contains(Picked)) {
18295- ScheduleData *Resched = ReschedMap[Picked];
18296- for (Instruction *PickedInst : ReorderBundle(Resched)) {
18297- if (PickedInst->getNextNonDebugInstruction() != LastScheduledInst &&
18298- LastScheduledInst->getPrevNode())
18299- PickedInst->moveAfter(LastScheduledInst->getPrevNode());
18300- LastScheduledInst = PickedInst;
18301- }
18302- }
1830318319 BS->schedule(Picked, ReadyInsts);
1830418320 }
1830518321
0 commit comments