@@ -10564,35 +10564,41 @@ class InstructionsCompatibilityAnalysis {
10564
10564
unsigned MainOpcode = 0;
10565
10565
Instruction *MainOp = nullptr;
10566
10566
10567
+ /// Checks if the opcode is supported as the main opcode for copyable
10568
+ /// elements.
10569
+ static bool isSupportedOpcode(const unsigned Opcode) {
10570
+ return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10571
+ }
10572
+
10567
10573
/// Identifies the best candidate value, which represents main opcode
10568
10574
/// operation.
10569
10575
/// Currently the best candidate is the Add instruction with the parent
10570
10576
/// block with the highest DFS incoming number (block, that dominates other).
10571
10577
void findAndSetMainInstruction(ArrayRef<Value *> VL, const BoUpSLP &R) {
10572
10578
BasicBlock *Parent = nullptr;
10573
10579
// Checks if the instruction has supported opcode.
10574
- auto IsSupportedOpcode = [&](Instruction *I) {
10575
- return I && I->getOpcode() == Instruction::Add &&
10580
+ auto IsSupportedInstruction = [&](Instruction *I) {
10581
+ return I && isSupportedOpcode( I->getOpcode()) &&
10576
10582
(!doesNotNeedToBeScheduled(I) || !R.isVectorized(I));
10577
10583
};
10578
10584
// Exclude operands instructions immediately to improve compile time, it
10579
10585
// will be unable to schedule anyway.
10580
10586
SmallDenseSet<Value *, 8> Operands;
10587
+ SmallMapVector<unsigned, SmallVector<Instruction *>, 4> Candidates;
10581
10588
for (Value *V : VL) {
10582
10589
auto *I = dyn_cast<Instruction>(V);
10583
10590
if (!I)
10584
10591
continue;
10585
10592
if (!DT.isReachableFromEntry(I->getParent()))
10586
10593
continue;
10587
- if (!MainOp ) {
10588
- MainOp = I ;
10594
+ if (Candidates.empty() ) {
10595
+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I) ;
10589
10596
Parent = I->getParent();
10590
10597
Operands.insert(I->op_begin(), I->op_end());
10591
10598
continue;
10592
10599
}
10593
10600
if (Parent == I->getParent()) {
10594
- if (!IsSupportedOpcode(MainOp) && !Operands.contains(I))
10595
- MainOp = I;
10601
+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10596
10602
Operands.insert(I->op_begin(), I->op_end());
10597
10603
continue;
10598
10604
}
@@ -10604,24 +10610,35 @@ class InstructionsCompatibilityAnalysis {
10604
10610
(NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
10605
10611
"Different nodes should have different DFS numbers");
10606
10612
if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
10607
- MainOp = I;
10613
+ Candidates.clear();
10614
+ Candidates.try_emplace(I->getOpcode()).first->second.push_back(I);
10608
10615
Parent = I->getParent();
10609
10616
Operands.clear();
10610
10617
Operands.insert(I->op_begin(), I->op_end());
10611
10618
}
10612
10619
}
10613
- if (!IsSupportedOpcode(MainOp) || Operands.contains(MainOp)) {
10614
- MainOp = nullptr;
10615
- return;
10620
+ unsigned BestOpcodeNum = 0;
10621
+ MainOp = nullptr;
10622
+ for (const auto &P : Candidates) {
10623
+ if (P.second.size() < BestOpcodeNum)
10624
+ continue;
10625
+ for (Instruction *I : P.second) {
10626
+ if (IsSupportedInstruction(I) && !Operands.contains(I)) {
10627
+ MainOp = I;
10628
+ BestOpcodeNum = P.second.size();
10629
+ break;
10630
+ }
10631
+ }
10616
10632
}
10617
- MainOpcode = MainOp->getOpcode();
10633
+ if (MainOp)
10634
+ MainOpcode = MainOp->getOpcode();
10618
10635
}
10619
10636
10620
10637
/// Returns the idempotent value for the \p MainOp with the detected \p
10621
10638
/// MainOpcode. For Add, returns 0. For Or, it should choose between false and
10622
10639
/// the operand itself, since V or V == V.
10623
10640
Value *selectBestIdempotentValue() const {
10624
- assert(MainOpcode == Instruction::Add && "Unsupported opcode");
10641
+ assert(isSupportedOpcode( MainOpcode) && "Unsupported opcode");
10625
10642
return ConstantExpr::getBinOpIdentity(MainOpcode, MainOp->getType(),
10626
10643
!MainOp->isCommutative());
10627
10644
}
@@ -10634,13 +10651,8 @@ class InstructionsCompatibilityAnalysis {
10634
10651
return {V, V};
10635
10652
if (!S.isCopyableElement(V))
10636
10653
return convertTo(cast<Instruction>(V), S).second;
10637
- switch (MainOpcode) {
10638
- case Instruction::Add:
10639
- return {V, selectBestIdempotentValue()};
10640
- default:
10641
- break;
10642
- }
10643
- llvm_unreachable("Unsupported opcode");
10654
+ assert(isSupportedOpcode(MainOpcode) && "Unsupported opcode");
10655
+ return {V, selectBestIdempotentValue()};
10644
10656
}
10645
10657
10646
10658
/// Builds operands for the original instructions.
@@ -10853,6 +10865,21 @@ class InstructionsCompatibilityAnalysis {
10853
10865
}
10854
10866
if (!Res)
10855
10867
return InstructionsState::invalid();
10868
+ constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
10869
+ InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
10870
+ InstructionCost VectorCost;
10871
+ FixedVectorType *VecTy =
10872
+ getWidenedType(S.getMainOp()->getType(), VL.size());
10873
+ switch (MainOpcode) {
10874
+ case Instruction::Add:
10875
+ case Instruction::LShr:
10876
+ VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
10877
+ break;
10878
+ default:
10879
+ llvm_unreachable("Unexpected instruction.");
10880
+ }
10881
+ if (VectorCost > ScalarCost)
10882
+ return InstructionsState::invalid();
10856
10883
return S;
10857
10884
}
10858
10885
assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -21090,6 +21117,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
21090
21117
ArrayRef<Value *> Op = EI.UserTE->getOperand(EI.EdgeIdx);
21091
21118
const auto *It = find(Op, CD->getInst());
21092
21119
assert(It != Op.end() && "Lane not set");
21120
+ SmallPtrSet<Instruction *, 4> Visited;
21093
21121
do {
21094
21122
int Lane = std::distance(Op.begin(), It);
21095
21123
assert(Lane >= 0 && "Lane not set");
@@ -21111,13 +21139,15 @@ void BoUpSLP::BlockScheduling::calculateDependencies(
21111
21139
(InsertInReadyList && UseSD->isReady()))
21112
21140
WorkList.push_back(UseSD);
21113
21141
}
21114
- } else if (ScheduleData *UseSD = getScheduleData(In)) {
21115
- CD->incDependencies();
21116
- if (!UseSD->isScheduled())
21117
- CD->incrementUnscheduledDeps(1);
21118
- if (!UseSD->hasValidDependencies() ||
21119
- (InsertInReadyList && UseSD->isReady()))
21120
- WorkList.push_back(UseSD);
21142
+ } else if (Visited.insert(In).second) {
21143
+ if (ScheduleData *UseSD = getScheduleData(In)) {
21144
+ CD->incDependencies();
21145
+ if (!UseSD->isScheduled())
21146
+ CD->incrementUnscheduledDeps(1);
21147
+ if (!UseSD->hasValidDependencies() ||
21148
+ (InsertInReadyList && UseSD->isReady()))
21149
+ WorkList.push_back(UseSD);
21150
+ }
21121
21151
}
21122
21152
It = find(make_range(std::next(It), Op.end()), CD->getInst());
21123
21153
} while (It != Op.end());
@@ -21875,9 +21905,11 @@ bool BoUpSLP::collectValuesToDemote(
21875
21905
return all_of(E.Scalars, [&](Value *V) {
21876
21906
if (isa<PoisonValue>(V))
21877
21907
return true;
21908
+ APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21909
+ if (E.isCopyableElement(V))
21910
+ return MaskedValueIsZero(V, ShiftedBits, SimplifyQuery(*DL));
21878
21911
auto *I = cast<Instruction>(V);
21879
21912
KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL);
21880
- APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
21881
21913
return AmtKnownBits.getMaxValue().ult(BitWidth) &&
21882
21914
MaskedValueIsZero(I->getOperand(0), ShiftedBits,
21883
21915
SimplifyQuery(*DL));
0 commit comments