Skip to content

Commit 9c4a307

Browse files
committed
[LV] Fold isPredicatedInst into isMaskRequired
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.
1 parent 47ce75e commit 9c4a307

File tree

6 files changed

+282
-211
lines changed

6 files changed

+282
-211
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,7 @@ class LoopVectorizationLegality {
420420

421421
/// Returns true if vector representation of the instruction \p I
422422
/// requires mask.
423-
bool isMaskRequired(const Instruction *I) const {
424-
return MaskedOp.contains(I);
425-
}
423+
bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
426424

427425
/// Returns true if there is at least one function call in the loop which
428426
/// has a vectorized variant available.

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,57 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
14081408
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
14091409
}
14101410

1411+
bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
1412+
bool FoldTailByMasking) const {
1413+
if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
1414+
(isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
1415+
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
1416+
return false;
1417+
1418+
// If the instruction was executed conditionally in the original scalar loop,
1419+
// predication is needed with a mask whose lanes are all possibly inactive.
1420+
if (blockNeedsPredication(I->getParent()))
1421+
return true;
1422+
1423+
// If we're not folding tail by masking, bail out now.
1424+
if (!FoldTailByMasking)
1425+
return false;
1426+
1427+
// All that remain are instructions with side-effects originally executed in
1428+
// the loop unconditionally, but now execute under a tail-fold mask (only)
1429+
// having at least one active lane (the first). If the side-effects of the
1430+
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
1431+
// - it will cause the same side-effects as when masked.
1432+
switch (I->getOpcode()) {
1433+
default:
1434+
llvm_unreachable(
1435+
"instruction should have been considered by earlier checks");
1436+
case Instruction::Call:
1437+
// Side-effects of a Call are assumed to be non-invariant, needing a
1438+
// (fold-tail) mask.
1439+
assert(MaskedOp.contains(I) &&
1440+
"should have returned earlier for calls not needing a mask");
1441+
return true;
1442+
case Instruction::Load:
1443+
// If the address is loop invariant no predication is needed.
1444+
return !isInvariant(getLoadStorePointerOperand(I));
1445+
case Instruction::Store: {
1446+
// For stores, we need to prove both speculation safety (which follows from
1447+
// the same argument as loads), but also must prove the value being stored
1448+
// is correct. The easiest form of the later is to require that all values
1449+
// stored are the same.
1450+
return !(isInvariant(getLoadStorePointerOperand(I)) &&
1451+
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
1452+
}
1453+
case Instruction::UDiv:
1454+
case Instruction::SDiv:
1455+
case Instruction::SRem:
1456+
case Instruction::URem:
1457+
// If the divisor is loop-invariant no predication is needed.
1458+
return !TheLoop->isLoopInvariant(I->getOperand(1));
1459+
}
1460+
}
1461+
14111462
bool LoopVectorizationLegality::blockCanBePredicated(
14121463
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
14131464
SmallPtrSetImpl<const Instruction *> &MaskedOp) const {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 24 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,11 +1292,6 @@ class LoopVectorizationCostModel {
12921292
/// \p VF is the vectorization factor that will be used to vectorize \p I.
12931293
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
12941294

1295-
/// Returns true if \p I is an instruction that needs to be predicated
1296-
/// at runtime. The result is independent of the predication mechanism.
1297-
/// Superset of instructions that return true for isScalarWithPredication.
1298-
bool isPredicatedInst(Instruction *I) const;
1299-
13001295
/// Return the costs for our two available strategies for lowering a
13011296
/// div/rem operation which requires speculating at least one lane.
13021297
/// First result is for scalarization (will be invalid for scalable
@@ -3018,7 +3013,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
30183013

30193014
bool LoopVectorizationCostModel::isScalarWithPredication(
30203015
Instruction *I, ElementCount VF) const {
3021-
if (!isPredicatedInst(I))
3016+
if (!Legal->isMaskRequired(I, foldTailByMasking()))
30223017
return false;
30233018

30243019
// Do we have a non-scalar lowering for this predicated
@@ -3057,59 +3052,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
30573052
}
30583053
}
30593054

3060-
// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3061-
bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
3062-
// TODO: We can use the loop-preheader as context point here and get
3063-
// context sensitive reasoning for isSafeToSpeculativelyExecute.
3064-
if (isSafeToSpeculativelyExecute(I) ||
3065-
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
3066-
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
3067-
return false;
3068-
3069-
// If the instruction was executed conditionally in the original scalar loop,
3070-
// predication is needed with a mask whose lanes are all possibly inactive.
3071-
if (Legal->blockNeedsPredication(I->getParent()))
3072-
return true;
3073-
3074-
// If we're not folding the tail by masking, predication is unnecessary.
3075-
if (!foldTailByMasking())
3076-
return false;
3077-
3078-
// All that remain are instructions with side-effects originally executed in
3079-
// the loop unconditionally, but now execute under a tail-fold mask (only)
3080-
// having at least one active lane (the first). If the side-effects of the
3081-
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
3082-
// - it will cause the same side-effects as when masked.
3083-
switch(I->getOpcode()) {
3084-
default:
3085-
llvm_unreachable(
3086-
"instruction should have been considered by earlier checks");
3087-
case Instruction::Call:
3088-
// Side-effects of a Call are assumed to be non-invariant, needing a
3089-
// (fold-tail) mask.
3090-
assert(Legal->isMaskRequired(I) &&
3091-
"should have returned earlier for calls not needing a mask");
3092-
return true;
3093-
case Instruction::Load:
3094-
// If the address is loop invariant no predication is needed.
3095-
return !Legal->isInvariant(getLoadStorePointerOperand(I));
3096-
case Instruction::Store: {
3097-
// For stores, we need to prove both speculation safety (which follows from
3098-
// the same argument as loads), but also must prove the value being stored
3099-
// is correct. The easiest form of the later is to require that all values
3100-
// stored are the same.
3101-
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3102-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3103-
}
3104-
case Instruction::UDiv:
3105-
case Instruction::SDiv:
3106-
case Instruction::SRem:
3107-
case Instruction::URem:
3108-
// If the divisor is loop-invariant no predication is needed.
3109-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3110-
}
3111-
}
3112-
31133055
std::pair<InstructionCost, InstructionCost>
31143056
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
31153057
ElementCount VF) const {
@@ -3222,7 +3164,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
32223164
// load, or any gaps in a store-access).
32233165
bool PredicatedAccessRequiresMasking =
32243166
blockNeedsPredicationForAnyReason(I->getParent()) &&
3225-
Legal->isMaskRequired(I);
3167+
Legal->isMaskRequired(I, foldTailByMasking());
32263168
bool LoadAccessWithGapsRequiresEpilogMasking =
32273169
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
32283170
!isScalarEpilogueAllowed();
@@ -3312,7 +3254,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
33123254
<< *I << "\n");
33133255
return;
33143256
}
3315-
if (isPredicatedInst(I)) {
3257+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
33163258
LLVM_DEBUG(
33173259
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
33183260
<< "\n");
@@ -5450,7 +5392,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
54505392
// from moving "masked load/store" check from legality to cost model.
54515393
// Masked Load/Gather emulation was previously never allowed.
54525394
// Limited number of Masked Store/Scatter emulation was allowed.
5453-
assert((isPredicatedInst(I)) &&
5395+
assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
54545396
"Expecting a scalar emulated instruction");
54555397
return isa<LoadInst>(I) ||
54565398
(isa<StoreInst>(I) &&
@@ -5752,7 +5694,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
57525694
// If we have a predicated load/store, it will need extra i1 extracts and
57535695
// conditional branches, but may not be executed for each vector lane. Scale
57545696
// the cost by the probability of executing the predicated block.
5755-
if (isPredicatedInst(I)) {
5697+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
57565698
Cost /= getPredBlockCostDivisor(CostKind);
57575699

57585700
// Add the cost of an i1 extract and a branch
@@ -5785,7 +5727,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57855727
"Stride should be 1 or -1 for consecutive memory access");
57865728
const Align Alignment = getLoadStoreAlignment(I);
57875729
InstructionCost Cost = 0;
5788-
if (Legal->isMaskRequired(I)) {
5730+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
57895731
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
57905732
CostKind);
57915733
} else {
@@ -5838,9 +5780,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
58385780
const Value *Ptr = getLoadStorePointerOperand(I);
58395781

58405782
return TTI.getAddressComputationCost(VectorTy) +
5841-
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
5842-
Legal->isMaskRequired(I), Alignment,
5843-
CostKind, I);
5783+
TTI.getGatherScatterOpCost(
5784+
I->getOpcode(), VectorTy, Ptr,
5785+
Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
5786+
I);
58445787
}
58455788

58465789
InstructionCost
@@ -5869,12 +5812,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
58695812
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
58705813
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
58715814
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
5872-
Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
5873-
UseMaskForGaps);
5815+
Group->getAlign(), AS, CostKind,
5816+
Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
58745817

58755818
if (Group->isReverse()) {
58765819
// TODO: Add support for reversed masked interleaved access.
5877-
assert(!Legal->isMaskRequired(I) &&
5820+
assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
58785821
"Reverse masked interleaved access not supported.");
58795822
Cost += Group->getNumMembers() *
58805823
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6367,7 +6310,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63676310
continue;
63686311
}
63696312

6370-
bool MaskRequired = Legal->isMaskRequired(CI);
6313+
bool MaskRequired = Legal->isMaskRequired(CI, foldTailByMasking());
63716314
// Compute corresponding vector type for return value and arguments.
63726315
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
63736316
for (Type *ScalarTy : ScalarTys)
@@ -6487,7 +6430,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64876430
// instruction in the loop. In that case, it is not trivially hoistable.
64886431
auto *OpI = dyn_cast<Instruction>(Op);
64896432
return !OpI || !TheLoop->contains(OpI) ||
6490-
(!isPredicatedInst(OpI) &&
6433+
(!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
64916434
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
64926435
all_of(OpI->operands(),
64936436
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6675,7 +6618,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66756618
case Instruction::SDiv:
66766619
case Instruction::URem:
66776620
case Instruction::SRem:
6678-
if (VF.isVector() && isPredicatedInst(I)) {
6621+
if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
66796622
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
66806623
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
66816624
ScalarCost : SafeDivisorCost;
@@ -6859,8 +6802,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
68596802
return TTI::CastContextHint::Interleave;
68606803
case LoopVectorizationCostModel::CM_Scalarize:
68616804
case LoopVectorizationCostModel::CM_Widen:
6862-
return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
6863-
: TTI::CastContextHint::Normal;
6805+
return Legal->isMaskRequired(I, foldTailByMasking())
6806+
? TTI::CastContextHint::Masked
6807+
: TTI::CastContextHint::Normal;
68646808
case LoopVectorizationCostModel::CM_Widen_Reverse:
68656809
return TTI::CastContextHint::Reversed;
68666810
case LoopVectorizationCostModel::CM_Unknown:
@@ -8417,7 +8361,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84178361
return nullptr;
84188362

84198363
VPValue *Mask = nullptr;
8420-
if (Legal->isMaskRequired(I))
8364+
if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
84218365
Mask = getBlockInMask(Builder.getInsertBlock());
84228366

84238367
// Determine if the pointer operand of the access is either consecutive or
@@ -8644,7 +8588,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
86448588
// vector variant at this VF requires a mask, so we synthesize an
86458589
// all-true mask.
86468590
VPValue *Mask = nullptr;
8647-
if (Legal->isMaskRequired(CI))
8591+
if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
86488592
Mask = getBlockInMask(Builder.getInsertBlock());
86498593
else
86508594
Mask = Plan.getOrAddLiveIn(
@@ -8685,7 +8629,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86858629
case Instruction::URem: {
86868630
// If not provably safe, use a select to form a safe divisor before widening the
86878631
// div/rem operation itself. Otherwise fall through to general handling below.
8688-
if (CM.isPredicatedInst(I)) {
8632+
if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
86898633
SmallVector<VPValue *> Ops(Operands);
86908634
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
86918635
VPValue *One =
@@ -8768,7 +8712,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
87688712

87698713
// In case of predicated execution (due to tail-folding, or conditional
87708714
// execution, or both), pass the relevant mask.
8771-
if (Legal->isMaskRequired(HI->Store))
8715+
if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
87728716
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
87738717

87748718
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
@@ -8781,7 +8725,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
87818725
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
87828726
Range);
87838727

8784-
bool IsPredicated = CM.isPredicatedInst(I);
8728+
bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
87858729

87868730
// Even if the instruction is not marked as uniform, there are certain
87878731
// intrinsic calls that can be effectively treated as such, so we check for

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
6464
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
6565
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
6666
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
67-
; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
68-
; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
69-
; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
70-
; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
67+
; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[LD]], i64 0
68+
; TFCOMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
69+
; TFCOMMON-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
7170
; TFCOMMON-NEXT: [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
7271
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
7372
; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
@@ -107,14 +106,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
107106
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
108107
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
109108
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
110-
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
111-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
112-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
113-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
114-
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
115-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
116-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
117-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
109+
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
110+
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
111+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
112+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
118113
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = fcmp ule <2 x double> [[TMP8]], zeroinitializer
119114
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = fcmp ule <2 x double> [[TMP12]], zeroinitializer
120115
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer

0 commit comments

Comments
 (0)