@@ -10564,41 +10564,35 @@ class InstructionsCompatibilityAnalysis {
10564
10564
unsigned MainOpcode = 0;
10565
10565
Instruction *MainOp = nullptr;
10566
10566
10567
- /// Checks if the opcode is supported as the main opcode for copyable
10568
- /// elements.
10569
- static bool isSupportedOpcode(const unsigned Opcode) {
10570
- return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10571
- }
10572
-
10573
10567
/// Identifies the best candidate value, which represents main opcode
10574
10568
/// operation.
10575
10569
/// Currently the best candidate is the Add instruction with the parent
10576
10570
/// block with the highest DFS incoming number (block, that dominates other).
10577
10571
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
10578
10572
BasicBlock *Parent = nullptr;
10579
10573
// Checks if the instruction has supported opcode.
10580
- auto IsSupportedInstruction = [&](Instruction *I) {
10581
- return I && isSupportedOpcode( I->getOpcode()) &&
10574
+ auto IsSupportedOpcode = [&](Instruction *I) {
10575
+ return I && I->getOpcode() == Instruction::Add &&
10582
10576
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
10583
10577
};
10584
10578
// Exclude operands instructions immediately to improve compile time, it
10585
10579
// will be unable to schedule anyway.
10586
10580
SmallDenseSet<Value *, 8> Operands;
10587
- SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
10588
10581
for (Value *V : VL) {
10589
10582
auto *I = dyn_cast<Instruction>(V);
10590
10583
if (!I)
10591
10584
continue;
10592
10585
if (!DT.isReachableFromEntry(I->getParent()))
10593
10586
continue;
10594
- if (Candidates.empty() ) {
10595
- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I) ;
10587
+ if (!MainOp ) {
10588
+ MainOp = I ;
10596
10589
Parent = I->getParent();
10597
10590
Operands.insert(I->op_begin(), I->op_end());
10598
10591
continue;
10599
10592
}
10600
10593
if (Parent == I->getParent()) {
10601
- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10594
+ if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
10595
+ MainOp = I;
10602
10596
Operands.insert(I->op_begin(), I->op_end());
10603
10597
continue;
10604
10598
}
@@ -10610,35 +10604,24 @@ class InstructionsCompatibilityAnalysis {
10610
10604
(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
10611
10605
"Different nodes should have different DFS numbers");
10612
10606
if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
10613
- Candidates.clear();
10614
- Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10607
+ MainOp = I;
10615
10608
Parent = I->getParent();
10616
10609
Operands.clear();
10617
10610
Operands.insert(I->op_begin(), I->op_end());
10618
10611
}
10619
10612
}
10620
- unsigned BestOpcodeNum = 0;
10621
- MainOp = nullptr;
10622
- for (const auto &P : Candidates) {
10623
- if (P.second.size() < BestOpcodeNum)
10624
- continue;
10625
- for (Instruction *I : P.second) {
10626
- if (IsSupportedInstruction(I) && !Operands.contains(I)) {
10627
- MainOp = I;
10628
- BestOpcodeNum = P.second.size();
10629
- break;
10630
- }
10631
- }
10613
+ if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
10614
+ MainOp = nullptr;
10615
+ return;
10632
10616
}
10633
- if (MainOp)
10634
- MainOpcode = MainOp->getOpcode();
10617
+ MainOpcode = MainOp->getOpcode();
10635
10618
}
10636
10619
10637
10620
/// Returns the idempotent value for the \p MainOp with the detected \p
10638
10621
/// MainOpcode. For Add, returns 0. For Or, it should choose between false and
10639
10622
/// the operand itself, since V or V == V.
10640
10623
Value *selectBestIdempotentValue() const {
10641
- assert(isSupportedOpcode( MainOpcode) && "Unsupported opcode");
10624
+ assert(MainOpcode == Instruction::Add && "Unsupported opcode");
10642
10625
return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
10643
10626
!MainOp->isCommutative());
10644
10627
}
@@ -10651,8 +10634,13 @@ class InstructionsCompatibilityAnalysis {
10651
10634
return {V, V};
10652
10635
if (!S.isCopyableElement(V))
10653
10636
return convertTo(cast<Instruction>(V), S).second;
10654
- assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10655
- return {V, selectBestIdempotentValue()};
10637
+ switch (MainOpcode) {
10638
+ case Instruction::Add:
10639
+ return {V, selectBestIdempotentValue()};
10640
+ default:
10641
+ break;
10642
+ }
10643
+ llvm_unreachable("Unsupported opcode");
10656
10644
}
10657
10645
10658
10646
/// Builds operands for the original instructions.
@@ -10865,21 +10853,6 @@ class InstructionsCompatibilityAnalysis {
10865
10853
}
10866
10854
if (!Res)
10867
10855
return InstructionsState::invalid();
10868
- constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
10869
- InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
10870
- InstructionCost VectorCost;
10871
- FixedVectorType *VecTy =
10872
- getWidenedType(S.getMainOp()->getType(), VL.size());
10873
- switch (MainOpcode) {
10874
- case Instruction::Add:
10875
- case Instruction::LShr:
10876
- VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
10877
- break;
10878
- default:
10879
- llvm_unreachable("Unexpected instruction.");
10880
- }
10881
- if (VectorCost > ScalarCost)
10882
- return InstructionsState::invalid();
10883
10856
return S;
10884
10857
}
10885
10858
assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21117,7 +21090,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
21117
21090
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
21118
21091
const auto *It = find(Op, CD->getInst());
21119
21092
assert(It != Op.end() && "Lane not set");
21120
- SmallPtrSet<Instruction *, 4> Visited;
21121
21093
do {
21122
21094
int Lane = std::distance(Op.begin(), It);
21123
21095
assert(Lane >= 0 && "Lane not set");
@@ -21139,15 +21111,13 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
21139
21111
(InsertInReadyList && UseSD->isReady()))
21140
21112
WorkList.push_back(UseSD);
21141
21113
}
21142
- } else if (Visited.insert(In).second) {
21143
- if (ScheduleData *UseSD = getScheduleData(In)) {
21144
- CD->incDependencies();
21145
- if (!UseSD->isScheduled())
21146
- CD->incrementUnscheduledDeps(1);
21147
- if (!UseSD->hasValidDependencies() ||
21148
- (InsertInReadyList && UseSD->isReady()))
21149
- WorkList.push_back(UseSD);
21150
- }
21114
+ } else if (ScheduleData *UseSD = getScheduleData(In)) {
21115
+ CD->incDependencies();
21116
+ if (!UseSD->isScheduled())
21117
+ CD->incrementUnscheduledDeps(1);
21118
+ if (!UseSD->hasValidDependencies() ||
21119
+ (InsertInReadyList && UseSD->isReady()))
21120
+ WorkList.push_back(UseSD);
21151
21121
}
21152
21122
It = find(make_range(std::next(It), Op.end()), CD->getInst());
21153
21123
} while (It != Op.end());
@@ -21905,11 +21875,9 @@ bool BoUpSLP::collectValuesToDemote(
21905
21875
return all_of(E.Scalars, [&](Value *V) {
21906
21876
if (isa<PoisonValue>(V))
21907
21877
return true;
21908
- APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21909
- if (E.isCopyableElement(V))
21910
- return MaskedValueIsZero(V, ShiftedBits, SimplifyQuery(*DL));
21911
21878
auto *I = cast<Instruction>(V);
21912
21879
KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
21880
+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21913
21881
return AmtKnownBits.getMaxValue().ult(BitWidth) &&
21914
21882
MaskedValueIsZero(I->getOperand(0), ShiftedBits,
21915
21883
SimplifyQuery(*DL));
0 commit comments