Skip to content

Commit e6c7094

Browse files
committed
[LV] Fold isPredicatedInst into isMaskRequired
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.
1 parent a1e041b commit e6c7094

File tree

6 files changed

+452
-366
lines changed

6 files changed

+452
-366
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,7 @@ class LoopVectorizationLegality {
420420

421421
/// Returns true if vector representation of the instruction \p I
422422
/// requires mask.
423-
bool isMaskRequired(const Instruction *I) const {
424-
return MaskedOp.contains(I);
425-
}
423+
bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
426424

427425
/// Returns true if there is at least one function call in the loop which
428426
/// has a vectorized variant available.

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,57 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
14081408
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
14091409
}
14101410

1411+
bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
1412+
bool FoldTailByMasking) const {
1413+
if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
1414+
(isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
1415+
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
1416+
return false;
1417+
1418+
// If the instruction was executed conditionally in the original scalar loop,
1419+
// predication is needed with a mask whose lanes are all possibly inactive.
1420+
if (blockNeedsPredication(I->getParent()))
1421+
return true;
1422+
1423+
// If we're not folding tail by masking, bail out now.
1424+
if (!FoldTailByMasking)
1425+
return false;
1426+
1427+
// All that remain are instructions with side-effects originally executed in
1428+
// the loop unconditionally, but now execute under a tail-fold mask (only)
1429+
// having at least one active lane (the first). If the side-effects of the
1430+
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
1431+
// - it will cause the same side-effects as when masked.
1432+
switch (I->getOpcode()) {
1433+
default:
1434+
llvm_unreachable(
1435+
"instruction should have been considered by earlier checks");
1436+
case Instruction::Call:
1437+
// Side-effects of a Call are assumed to be non-invariant, needing a
1438+
// (fold-tail) mask.
1439+
assert(MaskedOp.contains(I) &&
1440+
"should have returned earlier for calls not needing a mask");
1441+
return true;
1442+
case Instruction::Load:
1443+
// If the address is loop invariant no predication is needed.
1444+
return !isInvariant(getLoadStorePointerOperand(I));
1445+
case Instruction::Store: {
1446+
// For stores, we need to prove both speculation safety (which follows from
1447+
// the same argument as loads), but also must prove the value being stored
1448+
// is correct. The easiest form of the later is to require that all values
1449+
// stored are the same.
1450+
return !(isInvariant(getLoadStorePointerOperand(I)) &&
1451+
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
1452+
}
1453+
case Instruction::UDiv:
1454+
case Instruction::SDiv:
1455+
case Instruction::SRem:
1456+
case Instruction::URem:
1457+
// If the divisor is loop-invariant no predication is needed.
1458+
return !TheLoop->isLoopInvariant(I->getOperand(1));
1459+
}
1460+
}
1461+
14111462
bool LoopVectorizationLegality::blockCanBePredicated(
14121463
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
14131464
SmallPtrSetImpl<const Instruction *> &MaskedOp) const {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 24 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,11 +1313,6 @@ class LoopVectorizationCostModel {
13131313
/// \p VF is the vectorization factor that will be used to vectorize \p I.
13141314
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
13151315

1316-
/// Returns true if \p I is an instruction that needs to be predicated
1317-
/// at runtime. The result is independent of the predication mechanism.
1318-
/// Superset of instructions that return true for isScalarWithPredication.
1319-
bool isPredicatedInst(Instruction *I) const;
1320-
13211316
/// Return the costs for our two available strategies for lowering a
13221317
/// div/rem operation which requires speculating at least one lane.
13231318
/// First result is for scalarization (will be invalid for scalable
@@ -3203,7 +3198,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
32033198

32043199
bool LoopVectorizationCostModel::isScalarWithPredication(
32053200
Instruction *I, ElementCount VF) const {
3206-
if (!isPredicatedInst(I))
3201+
if (!Legal->isMaskRequired(I, foldTailByMasking()))
32073202
return false;
32083203

32093204
// Do we have a non-scalar lowering for this predicated
@@ -3242,57 +3237,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
32423237
}
32433238
}
32443239

3245-
// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
3246-
bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
3247-
// If predication is not needed, avoid it.
3248-
// TODO: We can use the loop-preheader as context point here and get
3249-
// context sensitive reasoning for isSafeToSpeculativelyExecute.
3250-
if (!blockNeedsPredicationForAnyReason(I->getParent()) ||
3251-
isSafeToSpeculativelyExecute(I) ||
3252-
(isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
3253-
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
3254-
return false;
3255-
3256-
// If the instruction was executed conditionally in the original scalar loop,
3257-
// predication is needed with a mask whose lanes are all possibly inactive.
3258-
if (Legal->blockNeedsPredication(I->getParent()))
3259-
return true;
3260-
3261-
// All that remain are instructions with side-effects originally executed in
3262-
// the loop unconditionally, but now execute under a tail-fold mask (only)
3263-
// having at least one active lane (the first). If the side-effects of the
3264-
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
3265-
// - it will cause the same side-effects as when masked.
3266-
switch(I->getOpcode()) {
3267-
default:
3268-
llvm_unreachable(
3269-
"instruction should have been considered by earlier checks");
3270-
case Instruction::Call:
3271-
// Side-effects of a Call are assumed to be non-invariant, needing a
3272-
// (fold-tail) mask.
3273-
assert(Legal->isMaskRequired(I) &&
3274-
"should have returned earlier for calls not needing a mask");
3275-
return true;
3276-
case Instruction::Load:
3277-
// If the address is loop invariant no predication is needed.
3278-
return !Legal->isInvariant(getLoadStorePointerOperand(I));
3279-
case Instruction::Store: {
3280-
// For stores, we need to prove both speculation safety (which follows from
3281-
// the same argument as loads), but also must prove the value being stored
3282-
// is correct. The easiest form of the later is to require that all values
3283-
// stored are the same.
3284-
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3285-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3286-
}
3287-
case Instruction::UDiv:
3288-
case Instruction::SDiv:
3289-
case Instruction::SRem:
3290-
case Instruction::URem:
3291-
// If the divisor is loop-invariant no predication is needed.
3292-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3293-
}
3294-
}
3295-
32963240
std::pair<InstructionCost, InstructionCost>
32973241
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
32983242
ElementCount VF) const {
@@ -3405,7 +3349,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
34053349
// load, or any gaps in a store-access).
34063350
bool PredicatedAccessRequiresMasking =
34073351
blockNeedsPredicationForAnyReason(I->getParent()) &&
3408-
Legal->isMaskRequired(I);
3352+
Legal->isMaskRequired(I, foldTailByMasking());
34093353
bool LoadAccessWithGapsRequiresEpilogMasking =
34103354
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
34113355
!isScalarEpilogueAllowed();
@@ -3494,7 +3438,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
34943438
<< *I << "\n");
34953439
return;
34963440
}
3497-
if (isPredicatedInst(I)) {
3441+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
34983442
LLVM_DEBUG(
34993443
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
35003444
<< "\n");
@@ -5379,7 +5323,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
53795323
// from moving "masked load/store" check from legality to cost model.
53805324
// Masked Load/Gather emulation was previously never allowed.
53815325
// Limited number of Masked Store/Scatter emulation was allowed.
5382-
assert((isPredicatedInst(I)) &&
5326+
assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
53835327
"Expecting a scalar emulated instruction");
53845328
return isa<LoadInst>(I) ||
53855329
(isa<StoreInst>(I) &&
@@ -5677,7 +5621,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
56775621
// If we have a predicated load/store, it will need extra i1 extracts and
56785622
// conditional branches, but may not be executed for each vector lane. Scale
56795623
// the cost by the probability of executing the predicated block.
5680-
if (isPredicatedInst(I)) {
5624+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
56815625
Cost /= getPredBlockCostDivisor(CostKind);
56825626

56835627
// Add the cost of an i1 extract and a branch
@@ -5710,7 +5654,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
57105654
"Stride should be 1 or -1 for consecutive memory access");
57115655
const Align Alignment = getLoadStoreAlignment(I);
57125656
InstructionCost Cost = 0;
5713-
if (Legal->isMaskRequired(I)) {
5657+
if (Legal->isMaskRequired(I, foldTailByMasking())) {
57145658
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
57155659
CostKind);
57165660
} else {
@@ -5763,9 +5707,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
57635707
const Value *Ptr = getLoadStorePointerOperand(I);
57645708

57655709
return TTI.getAddressComputationCost(VectorTy) +
5766-
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
5767-
Legal->isMaskRequired(I), Alignment,
5768-
CostKind, I);
5710+
TTI.getGatherScatterOpCost(
5711+
I->getOpcode(), VectorTy, Ptr,
5712+
Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
5713+
I);
57695714
}
57705715

57715716
InstructionCost
@@ -5794,12 +5739,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
57945739
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
57955740
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
57965741
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
5797-
Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
5798-
UseMaskForGaps);
5742+
Group->getAlign(), AS, CostKind,
5743+
Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
57995744

58005745
if (Group->isReverse()) {
58015746
// TODO: Add support for reversed masked interleaved access.
5802-
assert(!Legal->isMaskRequired(I) &&
5747+
assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
58035748
"Reverse masked interleaved access not supported.");
58045749
Cost += Group->getNumMembers() *
58055750
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6287,7 +6232,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62876232
continue;
62886233
}
62896234

6290-
bool MaskRequired = Legal->isMaskRequired(CI);
6235+
bool MaskRequired = Legal->isMaskRequired(CI, foldTailByMasking());
62916236
// Compute corresponding vector type for return value and arguments.
62926237
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
62936238
for (Type *ScalarTy : ScalarTys)
@@ -6407,7 +6352,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
64076352
// instruction in the loop. In that case, it is not trivially hoistable.
64086353
auto *OpI = dyn_cast<Instruction>(Op);
64096354
return !OpI || !TheLoop->contains(OpI) ||
6410-
(!isPredicatedInst(OpI) &&
6355+
(!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
64116356
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
64126357
all_of(OpI->operands(),
64136358
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6595,7 +6540,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65956540
case Instruction::SDiv:
65966541
case Instruction::URem:
65976542
case Instruction::SRem:
6598-
if (VF.isVector() && isPredicatedInst(I)) {
6543+
if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
65996544
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
66006545
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
66016546
ScalarCost : SafeDivisorCost;
@@ -6779,8 +6724,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
67796724
return TTI::CastContextHint::Interleave;
67806725
case LoopVectorizationCostModel::CM_Scalarize:
67816726
case LoopVectorizationCostModel::CM_Widen:
6782-
return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
6783-
: TTI::CastContextHint::Normal;
6727+
return Legal->isMaskRequired(I, foldTailByMasking())
6728+
? TTI::CastContextHint::Masked
6729+
: TTI::CastContextHint::Normal;
67846730
case LoopVectorizationCostModel::CM_Widen_Reverse:
67856731
return TTI::CastContextHint::Reversed;
67866732
case LoopVectorizationCostModel::CM_Unknown:
@@ -8317,7 +8263,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83178263
return nullptr;
83188264

83198265
VPValue *Mask = nullptr;
8320-
if (Legal->isMaskRequired(I))
8266+
if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
83218267
Mask = getBlockInMask(I->getParent());
83228268

83238269
// Determine if the pointer operand of the access is either consecutive or
@@ -8543,7 +8489,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
85438489
// vector variant at this VF requires a mask, so we synthesize an
85448490
// all-true mask.
85458491
VPValue *Mask = nullptr;
8546-
if (Legal->isMaskRequired(CI))
8492+
if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
85478493
Mask = getBlockInMask(CI->getParent());
85488494
else
85498495
Mask = Plan.getOrAddLiveIn(
@@ -8584,7 +8530,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
85848530
case Instruction::URem: {
85858531
// If not provably safe, use a select to form a safe divisor before widening the
85868532
// div/rem operation itself. Otherwise fall through to general handling below.
8587-
if (CM.isPredicatedInst(I)) {
8533+
if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
85888534
SmallVector<VPValue *> Ops(Operands);
85898535
VPValue *Mask = getBlockInMask(I->getParent());
85908536
VPValue *One =
@@ -8667,7 +8613,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
86678613

86688614
// In case of predicated execution (due to tail-folding, or conditional
86698615
// execution, or both), pass the relevant mask.
8670-
if (Legal->isMaskRequired(HI->Store))
8616+
if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
86718617
HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
86728618

86738619
return new VPHistogramRecipe(Opcode,
@@ -8682,7 +8628,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
86828628
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
86838629
Range);
86848630

8685-
bool IsPredicated = CM.isPredicatedInst(I);
8631+
bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
86868632

86878633
// Even if the instruction is not marked as uniform, there are certain
86888634
// intrinsic calls that can be effectively treated as such, so we check for

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
6767
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
6868
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
6969
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
70-
; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
71-
; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
72-
; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
73-
; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
70+
; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[LD]], i64 0
71+
; TFCOMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
72+
; TFCOMMON-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
7473
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
7574
; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
7675
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
@@ -114,14 +113,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
114113
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
115114
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
116115
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
117-
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
118-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
119-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
120-
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
121-
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
122-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
123-
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
124-
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
116+
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
117+
; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
118+
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
119+
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
125120
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
126121
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
127122
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true)

0 commit comments

Comments
 (0)