@@ -24435,135 +24435,134 @@ class HorizontalReduction {
24435
24435
VectorizedTree = GetNewVectorizedTree(
24436
24436
VectorizedTree,
24437
24437
emitReduction(Builder, *TTI, ReductionRoot->getType()));
24438
- if (VectorizedTree) {
24439
- // Reorder operands of bool logical op in the natural order to avoid
24440
- // possible problem with poison propagation. If not possible to reorder
24441
- // (both operands are originally RHS), emit an extra freeze instruction
24442
- // for the LHS operand.
24443
- // I.e., if we have original code like this:
24444
- // RedOp1 = select i1 ?, i1 LHS, i1 false
24445
- // RedOp2 = select i1 RHS, i1 ?, i1 false
24446
-
24447
- // Then, we swap LHS/RHS to create a new op that matches the poison
24448
- // semantics of the original code.
24449
-
24450
- // If we have original code like this and both values could be poison:
24451
- // RedOp1 = select i1 ?, i1 LHS, i1 false
24452
- // RedOp2 = select i1 ?, i1 RHS, i1 false
24453
-
24454
- // Then, we must freeze LHS in the new op.
24455
- auto FixBoolLogicalOps = [&, VectorizedTree](Value *&LHS, Value *&RHS,
24456
- Instruction *RedOp1,
24457
- Instruction *RedOp2,
24458
- bool InitStep) {
24459
- if (!AnyBoolLogicOp)
24460
- return;
24461
- if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24462
- getRdxOperand(RedOp1, 0) == LHS ||
24463
- isGuaranteedNotToBePoison(LHS, AC)))
24464
- return;
24465
- if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24466
- getRdxOperand(RedOp2, 0) == RHS ||
24467
- isGuaranteedNotToBePoison(RHS, AC))) {
24468
- std::swap(LHS, RHS);
24469
- return;
24470
- }
24471
- if (LHS != VectorizedTree)
24472
- LHS = Builder.CreateFreeze(LHS);
24473
- };
24474
- // Finish the reduction.
24475
- // Need to add extra arguments and not vectorized possible reduction
24476
- // values.
24477
- // Try to avoid dependencies between the scalar remainders after
24478
- // reductions.
24479
- auto FinalGen =
24480
- [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24481
- bool InitStep) {
24482
- unsigned Sz = InstVals.size();
24483
- SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 +
24484
- Sz % 2);
24485
- for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24486
- Instruction *RedOp = InstVals[I + 1].first;
24487
- Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24488
- Value *RdxVal1 = InstVals[I].second;
24489
- Value *StableRdxVal1 = RdxVal1;
24490
- auto It1 = TrackedVals.find(RdxVal1);
24491
- if (It1 != TrackedVals.end())
24492
- StableRdxVal1 = It1->second;
24493
- Value *RdxVal2 = InstVals[I + 1].second;
24494
- Value *StableRdxVal2 = RdxVal2;
24495
- auto It2 = TrackedVals.find(RdxVal2);
24496
- if (It2 != TrackedVals.end())
24497
- StableRdxVal2 = It2->second;
24498
- // To prevent poison from leaking across what used to be
24499
- // sequential, safe, scalar boolean logic operations, the
24500
- // reduction operand must be frozen.
24501
- FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24502
- RedOp, InitStep);
24503
- Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24504
- StableRdxVal2, "op.rdx", ReductionOps);
24505
- ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24506
- }
24507
- if (Sz % 2 == 1)
24508
- ExtraReds[Sz / 2] = InstVals.back();
24509
- return ExtraReds;
24510
- };
24511
- SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24512
- ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24513
- VectorizedTree);
24514
- SmallPtrSet<Value *, 8> Visited;
24515
- for (ArrayRef<Value *> Candidates : ReducedVals) {
24516
- for (Value *RdxVal : Candidates) {
24517
- if (!Visited.insert(RdxVal).second)
24518
- continue;
24519
- unsigned NumOps = VectorizedVals.lookup(RdxVal);
24520
- for (Instruction *RedOp :
24521
- ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24522
- ExtraReductions.emplace_back(RedOp, RdxVal);
24523
- }
24438
+
24439
+ if (!VectorizedTree) {
24440
+ if (!CheckForReusedReductionOps) {
24441
+ for (ReductionOpsType &RdxOps : ReductionOps)
24442
+ for (Value *RdxOp : RdxOps)
24443
+ V.analyzedReductionRoot(cast<Instruction>(RdxOp));
24524
24444
}
24525
- // Iterate through all not-vectorized reduction values/extra arguments.
24526
- bool InitStep = true;
24527
- while (ExtraReductions.size() > 1) {
24528
- SmallVector<std::pair<Instruction *, Value *>> NewReds =
24529
- FinalGen(ExtraReductions, InitStep);
24530
- ExtraReductions.swap(NewReds);
24531
- InitStep = false;
24445
+ return nullptr;
24446
+ }
24447
+
24448
+ // Reorder operands of bool logical op in the natural order to avoid
24449
+ // possible problem with poison propagation. If not possible to reorder
24450
+ // (both operands are originally RHS), emit an extra freeze instruction
24451
+ // for the LHS operand.
24452
+ // I.e., if we have original code like this:
24453
+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24454
+ // RedOp2 = select i1 RHS, i1 ?, i1 false
24455
+
24456
+ // Then, we swap LHS/RHS to create a new op that matches the poison
24457
+ // semantics of the original code.
24458
+
24459
+ // If we have original code like this and both values could be poison:
24460
+ // RedOp1 = select i1 ?, i1 LHS, i1 false
24461
+ // RedOp2 = select i1 ?, i1 RHS, i1 false
24462
+
24463
+ // Then, we must freeze LHS in the new op.
24464
+ auto FixBoolLogicalOps =
24465
+ [&, VectorizedTree](Value *&LHS, Value *&RHS, Instruction *RedOp1,
24466
+ Instruction *RedOp2, bool InitStep) {
24467
+ if (!AnyBoolLogicOp)
24468
+ return;
24469
+ if (isBoolLogicOp(RedOp1) && ((!InitStep && LHS == VectorizedTree) ||
24470
+ getRdxOperand(RedOp1, 0) == LHS ||
24471
+ isGuaranteedNotToBePoison(LHS, AC)))
24472
+ return;
24473
+ if (isBoolLogicOp(RedOp2) && ((!InitStep && RHS == VectorizedTree) ||
24474
+ getRdxOperand(RedOp2, 0) == RHS ||
24475
+ isGuaranteedNotToBePoison(RHS, AC))) {
24476
+ std::swap(LHS, RHS);
24477
+ return;
24478
+ }
24479
+ if (LHS != VectorizedTree)
24480
+ LHS = Builder.CreateFreeze(LHS);
24481
+ };
24482
+ // Finish the reduction.
24483
+ // Need to add extra arguments and not vectorized possible reduction values.
24484
+ // Try to avoid dependencies between the scalar remainders after reductions.
24485
+ auto FinalGen = [&](ArrayRef<std::pair<Instruction *, Value *>> InstVals,
24486
+ bool InitStep) {
24487
+ unsigned Sz = InstVals.size();
24488
+ SmallVector<std::pair<Instruction *, Value *>> ExtraReds(Sz / 2 + Sz % 2);
24489
+ for (unsigned I = 0, E = (Sz / 2) * 2; I < E; I += 2) {
24490
+ Instruction *RedOp = InstVals[I + 1].first;
24491
+ Builder.SetCurrentDebugLocation(RedOp->getDebugLoc());
24492
+ Value *RdxVal1 = InstVals[I].second;
24493
+ Value *StableRdxVal1 = RdxVal1;
24494
+ auto It1 = TrackedVals.find(RdxVal1);
24495
+ if (It1 != TrackedVals.end())
24496
+ StableRdxVal1 = It1->second;
24497
+ Value *RdxVal2 = InstVals[I + 1].second;
24498
+ Value *StableRdxVal2 = RdxVal2;
24499
+ auto It2 = TrackedVals.find(RdxVal2);
24500
+ if (It2 != TrackedVals.end())
24501
+ StableRdxVal2 = It2->second;
24502
+ // To prevent poison from leaking across what used to be sequential,
24503
+ // safe, scalar boolean logic operations, the reduction operand must be
24504
+ // frozen.
24505
+ FixBoolLogicalOps(StableRdxVal1, StableRdxVal2, InstVals[I].first,
24506
+ RedOp, InitStep);
24507
+ Value *ExtraRed = createOp(Builder, RdxKind, StableRdxVal1,
24508
+ StableRdxVal2, "op.rdx", ReductionOps);
24509
+ ExtraReds[I / 2] = std::make_pair(InstVals[I].first, ExtraRed);
24510
+ }
24511
+ if (Sz % 2 == 1)
24512
+ ExtraReds[Sz / 2] = InstVals.back();
24513
+ return ExtraReds;
24514
+ };
24515
+ SmallVector<std::pair<Instruction *, Value *>> ExtraReductions;
24516
+ ExtraReductions.emplace_back(cast<Instruction>(ReductionRoot),
24517
+ VectorizedTree);
24518
+ SmallPtrSet<Value *, 8> Visited;
24519
+ for (ArrayRef<Value *> Candidates : ReducedVals) {
24520
+ for (Value *RdxVal : Candidates) {
24521
+ if (!Visited.insert(RdxVal).second)
24522
+ continue;
24523
+ unsigned NumOps = VectorizedVals.lookup(RdxVal);
24524
+ for (Instruction *RedOp :
24525
+ ArrayRef(ReducedValsToOps.at(RdxVal)).drop_back(NumOps))
24526
+ ExtraReductions.emplace_back(RedOp, RdxVal);
24532
24527
}
24533
- VectorizedTree = ExtraReductions.front().second;
24528
+ }
24529
+ // Iterate through all not-vectorized reduction values/extra arguments.
24530
+ bool InitStep = true;
24531
+ while (ExtraReductions.size() > 1) {
24532
+ SmallVector<std::pair<Instruction *, Value *>> NewReds =
24533
+ FinalGen(ExtraReductions, InitStep);
24534
+ ExtraReductions.swap(NewReds);
24535
+ InitStep = false;
24536
+ }
24537
+ VectorizedTree = ExtraReductions.front().second;
24534
24538
24535
- ReductionRoot->replaceAllUsesWith(VectorizedTree);
24539
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
24536
24540
24537
- // The original scalar reduction is expected to have no remaining
24538
- // uses outside the reduction tree itself. Assert that we got this
24539
- // correct, replace internal uses with undef, and mark for eventual
24540
- // deletion.
24541
+ // The original scalar reduction is expected to have no remaining
24542
+ // uses outside the reduction tree itself. Assert that we got this
24543
+ // correct, replace internal uses with undef, and mark for eventual
24544
+ // deletion.
24541
24545
#ifndef NDEBUG
24542
- SmallPtrSet<Value *, 4> IgnoreSet;
24543
- for (ArrayRef<Value *> RdxOps : ReductionOps)
24544
- IgnoreSet.insert_range(RdxOps);
24546
+ SmallPtrSet<Value *, 4> IgnoreSet;
24547
+ for (ArrayRef<Value *> RdxOps : ReductionOps)
24548
+ IgnoreSet.insert_range(RdxOps);
24545
24549
#endif
24546
- for (ArrayRef<Value *> RdxOps : ReductionOps) {
24547
- for (Value *Ignore : RdxOps) {
24548
- if (!Ignore)
24549
- continue;
24550
+ for (ArrayRef<Value *> RdxOps : ReductionOps) {
24551
+ for (Value *Ignore : RdxOps) {
24552
+ if (!Ignore)
24553
+ continue;
24550
24554
#ifndef NDEBUG
24551
- for (auto *U : Ignore->users()) {
24552
- assert(IgnoreSet.count(U) &&
24553
- "All users must be either in the reduction ops list.");
24554
- }
24555
+ for (auto *U : Ignore->users()) {
24556
+ assert(IgnoreSet.count(U) &&
24557
+ "All users must be either in the reduction ops list.");
24558
+ }
24555
24559
#endif
24556
- if (!Ignore->use_empty()) {
24557
- Value *P = PoisonValue::get(Ignore->getType());
24558
- Ignore->replaceAllUsesWith(P);
24559
- }
24560
+ if (!Ignore->use_empty()) {
24561
+ Value *P = PoisonValue::get(Ignore->getType());
24562
+ Ignore->replaceAllUsesWith(P);
24560
24563
}
24561
- V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
24562
24564
}
24563
- } else if (!CheckForReusedReductionOps) {
24564
- for (ReductionOpsType &RdxOps : ReductionOps)
24565
- for (Value *RdxOp : RdxOps)
24566
- V.analyzedReductionRoot(cast<Instruction>(RdxOp));
24565
+ V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
24567
24566
}
24568
24567
return VectorizedTree;
24569
24568
}
0 commit comments