@@ -10564,41 +10564,35 @@ class InstructionsCompatibilityAnalysis {
1056410564 unsigned MainOpcode = 0;
1056510565 Instruction *MainOp = nullptr;
1056610566
10567- /// Checks if the opcode is supported as the main opcode for copyable
10568- /// elements.
10569- static bool isSupportedOpcode(const unsigned Opcode) {
10570- return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10571- }
10572-
1057310567 /// Identifies the best candidate value, which represents main opcode
1057410568 /// operation.
1057510569 /// Currently the best candidate is the Add instruction with the parent
1057610570 /// block with the highest DFS incoming number (block, that dominates other).
1057710571 void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
1057810572 BasicBlock *Parent = nullptr;
1057910573 // Checks if the instruction has supported opcode.
10580- auto IsSupportedInstruction = [&](Instruction *I) {
10581- return I && isSupportedOpcode( I->getOpcode()) &&
10574+ auto IsSupportedOpcode = [&](Instruction *I) {
10575+ return I && I->getOpcode() == Instruction::Add &&
1058210576 (!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
1058310577 };
1058410578 // Exclude operands instructions immediately to improve compile time, it
1058510579 // will be unable to schedule anyway.
1058610580 SmallDenseSet<Value *, 8> Operands;
10587- SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
1058810581 for (Value *V : VL) {
1058910582 auto *I = dyn_cast<Instruction>(V);
1059010583 if (!I)
1059110584 continue;
1059210585 if (!DT.isReachableFromEntry(I->getParent()))
1059310586 continue;
10594- if (Candidates.empty() ) {
10595- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I) ;
10587+ if (!MainOp ) {
10588+ MainOp = I ;
1059610589 Parent = I->getParent();
1059710590 Operands.insert(I->op_begin(), I->op_end());
1059810591 continue;
1059910592 }
1060010593 if (Parent == I->getParent()) {
10601- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10594+ if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
10595+ MainOp = I;
1060210596 Operands.insert(I->op_begin(), I->op_end());
1060310597 continue;
1060410598 }
@@ -10610,35 +10604,24 @@ class InstructionsCompatibilityAnalysis {
1061010604 (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
1061110605 "Different nodes should have different DFS numbers");
1061210606 if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
10613- Candidates.clear();
10614- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10607+ MainOp = I;
1061510608 Parent = I->getParent();
1061610609 Operands.clear();
1061710610 Operands.insert(I->op_begin(), I->op_end());
1061810611 }
1061910612 }
10620- unsigned BestOpcodeNum = 0;
10621- MainOp = nullptr;
10622- for (const auto &P : Candidates) {
10623- if (P.second.size() < BestOpcodeNum)
10624- continue;
10625- for (Instruction *I : P.second) {
10626- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
10627- MainOp = I;
10628- BestOpcodeNum = P.second.size();
10629- break;
10630- }
10631- }
10613+ if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
10614+ MainOp = nullptr;
10615+ return;
1063210616 }
10633- if (MainOp)
10634- MainOpcode = MainOp->getOpcode();
10617+ MainOpcode = MainOp->getOpcode();
1063510618 }
1063610619
1063710620 /// Returns the idempotent value for the \p MainOp with the detected \p
1063810621 /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
1063910622 /// the operand itself, since V or V == V.
1064010623 Value *selectBestIdempotentValue() const {
10641- assert(isSupportedOpcode( MainOpcode) && "Unsupported opcode");
10624+ assert(MainOpcode == Instruction::Add && "Unsupported opcode");
1064210625 return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
1064310626 !MainOp->isCommutative());
1064410627 }
@@ -10651,8 +10634,13 @@ class InstructionsCompatibilityAnalysis {
1065110634 return {V, V};
1065210635 if (!S.isCopyableElement(V))
1065310636 return convertTo(cast<Instruction>(V), S).second;
10654- assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10655- return {V, selectBestIdempotentValue()};
10637+ switch (MainOpcode) {
10638+ case Instruction::Add:
10639+ return {V, selectBestIdempotentValue()};
10640+ default:
10641+ break;
10642+ }
10643+ llvm_unreachable("Unsupported opcode");
1065610644 }
1065710645
1065810646 /// Builds operands for the original instructions.
@@ -10865,21 +10853,6 @@ class InstructionsCompatibilityAnalysis {
1086510853 }
1086610854 if (!Res)
1086710855 return InstructionsState::invalid();
10868- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
10869- InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
10870- InstructionCost VectorCost;
10871- FixedVectorType *VecTy =
10872- getWidenedType(S.getMainOp()->getType(), VL.size());
10873- switch (MainOpcode) {
10874- case Instruction::Add:
10875- case Instruction::LShr:
10876- VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
10877- break;
10878- default:
10879- llvm_unreachable("Unexpected instruction.");
10880- }
10881- if (VectorCost > ScalarCost)
10882- return InstructionsState::invalid();
1088310856 return S;
1088410857 }
1088510858 assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21117,7 +21090,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2111721090 ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
2111821091 const auto *It = find(Op, CD->getInst());
2111921092 assert(It != Op.end() && "Lane not set");
21120- SmallPtrSet<Instruction *, 4> Visited;
2112121093 do {
2112221094 int Lane = std::distance(Op.begin(), It);
2112321095 assert(Lane >= 0 && "Lane not set");
@@ -21139,15 +21111,13 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
2113921111 (InsertInReadyList && UseSD->isReady()))
2114021112 WorkList.push_back(UseSD);
2114121113 }
21142- } else if (Visited.insert(In).second) {
21143- if (ScheduleData *UseSD = getScheduleData(In)) {
21144- CD->incDependencies();
21145- if (!UseSD->isScheduled())
21146- CD->incrementUnscheduledDeps(1);
21147- if (!UseSD->hasValidDependencies() ||
21148- (InsertInReadyList && UseSD->isReady()))
21149- WorkList.push_back(UseSD);
21150- }
21114+ } else if (ScheduleData *UseSD = getScheduleData(In)) {
21115+ CD->incDependencies();
21116+ if (!UseSD->isScheduled())
21117+ CD->incrementUnscheduledDeps(1);
21118+ if (!UseSD->hasValidDependencies() ||
21119+ (InsertInReadyList && UseSD->isReady()))
21120+ WorkList.push_back(UseSD);
2115121121 }
2115221122 It = find(make_range(std::next(It), Op.end()), CD->getInst());
2115321123 } while (It != Op.end());
@@ -21905,11 +21875,9 @@ bool BoUpSLP::collectValuesToDemote(
2190521875 return all_of(E.Scalars, [&](Value *V) {
2190621876 if (isa<PoisonValue>(V))
2190721877 return true;
21908- APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21909- if (E.isCopyableElement(V))
21910- return MaskedValueIsZero(V, ShiftedBits, SimplifyQuery(*DL));
2191121878 auto *I = cast<Instruction>(V);
2191221879 KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
21880+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
2191321881 return AmtKnownBits.getMaxValue().ult(BitWidth) &&
2191421882 MaskedValueIsZero(I->getOperand(0), ShiftedBits,
2191521883 SimplifyQuery(*DL));
0 commit comments