@@ -10564,35 +10564,41 @@ class InstructionsCompatibilityAnalysis {
1056410564 unsigned MainOpcode = 0;
1056510565 Instruction *MainOp = nullptr;
1056610566
10567+ /// Checks if the opcode is supported as the main opcode for copyable
10568+ /// elements.
10569+ static bool isSupportedOpcode(const unsigned Opcode) {
10570+ return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10571+ }
10572+
1056710573 /// Identifies the best candidate value, which represents main opcode
1056810574 /// operation.
1056910575 /// Currently the best candidate is the Add instruction with the parent
1057010576 /// block with the highest DFS incoming number (block, that dominates other).
1057110577 void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
1057210578 BasicBlock *Parent = nullptr;
1057310579 // Checks if the instruction has supported opcode.
10574- auto IsSupportedOpcode = [&](Instruction *I) {
10575- return I && I->getOpcode() == Instruction::Add &&
10580+ auto IsSupportedInstruction = [&](Instruction *I) {
10581+ return I && isSupportedOpcode( I->getOpcode()) &&
1057610582 (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
1057710583 };
1057810584 // Exclude operands instructions immediately to improve compile time, it
1057910585 // will be unable to schedule anyway.
1058010586 SmallDenseSet<Value *, 8> Operands;
10587+ SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
1058110588 for (Value *V : VL) {
1058210589 auto *I = dyn_cast<Instruction>(V);
1058310590 if (!I)
1058410591 continue;
1058510592 if (!DT.isReachableFromEntry(I->getParent()))
1058610593 continue;
10587- if (!MainOp ) {
10588- MainOp = I ;
10594+ if (Candidates.empty() ) {
10595+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I) ;
1058910596 Parent = I->getParent();
1059010597 Operands.insert(I->op_begin(), I->op_end());
1059110598 continue;
1059210599 }
1059310600 if (Parent == I->getParent()) {
10594- if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
10595- MainOp = I;
10601+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
1059610602 Operands.insert(I->op_begin(), I->op_end());
1059710603 continue;
1059810604 }
@@ -10604,24 +10610,35 @@ class InstructionsCompatibilityAnalysis {
1060410610 (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
1060510611 "Different nodes should have different DFS numbers");
1060610612 if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
10607- MainOp = I;
10613+ Candidates.clear();
10614+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
1060810615 Parent = I->getParent();
1060910616 Operands.clear();
1061010617 Operands.insert(I->op_begin(), I->op_end());
1061110618 }
1061210619 }
10613- if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
10614- MainOp = nullptr;
10615- return;
10620+ unsigned BestOpcodeNum = 0;
10621+ MainOp = nullptr;
10622+ for (const auto &P : Candidates) {
10623+ if (P.second.size() < BestOpcodeNum)
10624+ continue;
10625+ for (Instruction *I : P.second) {
10626+ if (IsSupportedInstruction(I) && !Operands.contains(I)) {
10627+ MainOp = I;
10628+ BestOpcodeNum = P.second.size();
10629+ break;
10630+ }
10631+ }
1061610632 }
10617- MainOpcode = MainOp->getOpcode();
10633+ if (MainOp)
10634+ MainOpcode = MainOp->getOpcode();
1061810635 }
1061910636
1062010637 /// Returns the idempotent value for the \p MainOp with the detected \p
1062110638 /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
1062210639 /// the operand itself, since V or V == V.
1062310640 Value *selectBestIdempotentValue() const {
10624- assert(MainOpcode == Instruction::Add && "Unsupported opcode");
10641+ assert(isSupportedOpcode( MainOpcode) && "Unsupported opcode");
1062510642 return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
1062610643 !MainOp->isCommutative());
1062710644 }
@@ -10634,13 +10651,8 @@ class InstructionsCompatibilityAnalysis {
1063410651 return {V, V};
1063510652 if (!S.isCopyableElement(V))
1063610653 return convertTo(cast<Instruction>(V), S).second;
10637- switch (MainOpcode) {
10638- case Instruction::Add:
10639- return {V, selectBestIdempotentValue()};
10640- default:
10641- break;
10642- }
10643- llvm_unreachable("Unsupported opcode");
10654+ assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10655+ return {V, selectBestIdempotentValue()};
1064410656 }
1064510657
1064610658 /// Builds operands for the original instructions.
@@ -10853,6 +10865,21 @@ class InstructionsCompatibilityAnalysis {
1085310865 }
1085410866 if (!Res)
1085510867 return InstructionsState::invalid();
10868+ constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
10869+ InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
10870+ InstructionCost VectorCost;
10871+ FixedVectorType *VecTy =
10872+ getWidenedType(S.getMainOp()->getType(), VL.size());
10873+ switch (MainOpcode) {
10874+ case Instruction::Add:
10875+ case Instruction::LShr:
10876+ VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
10877+ break;
10878+ default:
10879+ llvm_unreachable("Unexpected instruction.");
10880+ }
10881+ if (VectorCost > ScalarCost)
10882+ return InstructionsState::invalid();
1085610883 return S;
1085710884 }
1085810885 assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21090,6 +21117,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2109021117 ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
2109121118 const auto *It = find(Op, CD->getInst());
2109221119 assert(It != Op.end() && "Lane not set");
21120+ SmallPtrSet<Instruction *, 4> Visited;
2109321121 do {
2109421122 int Lane = std::distance(Op.begin(), It);
2109521123 assert(Lane >= 0 && "Lane not set");
@@ -21111,13 +21139,15 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2111121139 (InsertInReadyList && UseSD->isReady()))
2111221140 WorkList.push_back(UseSD);
2111321141 }
21114- } else if (ScheduleData *UseSD = getScheduleData(In)) {
21115- CD->incDependencies();
21116- if (!UseSD->isScheduled())
21117- CD->incrementUnscheduledDeps(1);
21118- if (!UseSD->hasValidDependencies() ||
21119- (InsertInReadyList && UseSD->isReady()))
21120- WorkList.push_back(UseSD);
21142+ } else if (Visited.insert(In).second) {
21143+ if (ScheduleData *UseSD = getScheduleData(In)) {
21144+ CD->incDependencies();
21145+ if (!UseSD->isScheduled())
21146+ CD->incrementUnscheduledDeps(1);
21147+ if (!UseSD->hasValidDependencies() ||
21148+ (InsertInReadyList && UseSD->isReady()))
21149+ WorkList.push_back(UseSD);
21150+ }
2112121151 }
2112221152 It = find(make_range(std::next(It), Op.end()), CD->getInst());
2112321153 } while (It != Op.end());
@@ -21875,9 +21905,11 @@ bool BoUpSLP::collectValuesToDemote(
2187521905 return all_of(E.Scalars, [&](Value *V) {
2187621906 if (isa<PoisonValue>(V))
2187721907 return true;
21908+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21909+ if (E.isCopyableElement(V))
21910+ return MaskedValueIsZero(V, ShiftedBits, SimplifyQuery(*DL));
2187821911 auto *I = cast<Instruction>(V);
2187921912 KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
21880- APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
2188121913 return AmtKnownBits.getMaxValue().ult(BitWidth) &&
2188221914 MaskedValueIsZero(I->getOperand(0), ShiftedBits,
2188321915 SimplifyQuery(*DL));
0 commit comments