@@ -268,13 +268,16 @@ static Value *getMaskOperand(IntrinsicInst *II) {
268
268
}
269
269
}
270
270
271
- // Return the corresponded deinterleaved mask, or nullptr if there is no valid
272
- // mask.
273
- static Value *getMask (Value *WideMask, unsigned Factor,
274
- ElementCount LeafValueEC);
275
-
276
- static Value *getMask (Value *WideMask, unsigned Factor,
277
- VectorType *LeafValueTy) {
271
+ // Return a pair of
272
+ // (1) The corresponded deinterleaved mask, or nullptr if there is no valid
273
+ // mask.
274
+ // (2) Some mask effectively skips a certain field, and this element is a mask
275
+ // in which inactive lanes represent fields that are skipped (i.e. "gaps").
276
+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
277
+ ElementCount LeafValueEC);
278
+
279
+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
280
+ VectorType *LeafValueTy) {
278
281
return getMask (WideMask, Factor, LeafValueTy->getElementCount ());
279
282
}
280
283
@@ -379,22 +382,25 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
379
382
replaceBinOpShuffles (BinOpShuffles.getArrayRef (), Shuffles, Load);
380
383
381
384
Value *Mask = nullptr ;
385
+ auto GapMask = APInt::getAllOnes (Factor);
382
386
if (LI) {
383
387
LLVM_DEBUG (dbgs () << " IA: Found an interleaved load: " << *Load << " \n " );
384
388
} else {
385
389
// Check mask operand. Handle both all-true/false and interleaved mask.
386
- Mask = getMask (getMaskOperand (II), Factor, VecTy);
390
+ std::tie ( Mask, GapMask) = getMask (getMaskOperand (II), Factor, VecTy);
387
391
if (!Mask)
388
392
return false ;
389
393
390
394
LLVM_DEBUG (dbgs () << " IA: Found an interleaved vp.load or masked.load: "
391
395
<< *Load << " \n " );
396
+ LLVM_DEBUG (dbgs () << " IA: With nominal factor " << Factor
397
+ << " and actual factor " << GapMask.popcount () << " \n " );
392
398
}
393
399
394
400
// Try to create target specific intrinsics to replace the load and
395
401
// shuffles.
396
402
if (!TLI->lowerInterleavedLoad (cast<Instruction>(Load), Mask, Shuffles,
397
- Indices, Factor))
403
+ Indices, Factor, GapMask ))
398
404
// If Extracts is not empty, tryReplaceExtracts made changes earlier.
399
405
return !Extracts.empty () || BinOpShuffleChanged;
400
406
@@ -536,10 +542,15 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
536
542
} else {
537
543
// Check mask operand. Handle both all-true/false and interleaved mask.
538
544
unsigned LaneMaskLen = NumStoredElements / Factor;
539
- Mask = getMask (getMaskOperand (II), Factor,
540
- ElementCount::getFixed (LaneMaskLen));
545
+ APInt GapMask (Factor, 0 );
546
+ std::tie (Mask, GapMask) = getMask (getMaskOperand (II), Factor,
547
+ ElementCount::getFixed (LaneMaskLen));
541
548
if (!Mask)
542
549
return false ;
550
+ // We haven't supported gap mask for stores. Yet it is possible that we
551
+ // already changed the IR, hence returning true here.
552
+ if (GapMask.popcount () != Factor)
553
+ return true ;
543
554
544
555
LLVM_DEBUG (dbgs () << " IA: Found an interleaved vp.store or masked.store: "
545
556
<< *Store << " \n " );
@@ -556,34 +567,64 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
556
567
return true ;
557
568
}
558
569
559
- static Value *getMask (Value *WideMask, unsigned Factor,
560
- ElementCount LeafValueEC) {
570
+ // A wide mask <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> could be used to skip the
571
+ // last field in a factor-of-three interleaved store or deinterleaved load (in
572
+ // which case LeafMaskLen is 4). Such (wide) mask is also known as gap mask.
573
+ // This helper function tries to detect this pattern and return the actual
574
+ // factor we're accessing, which is 2 in this example.
575
+ static void getGapMask (const Constant &MaskConst, unsigned Factor,
576
+ unsigned LeafMaskLen, APInt &GapMask) {
577
+ assert (GapMask.getBitWidth () == Factor);
578
+ for (unsigned F = 0U ; F < Factor; ++F) {
579
+ bool AllZero = true ;
580
+ for (unsigned Idx = 0U ; Idx < LeafMaskLen; ++Idx) {
581
+ Constant *C = MaskConst.getAggregateElement (F + Idx * Factor);
582
+ if (!C->isZeroValue ()) {
583
+ AllZero = false ;
584
+ break ;
585
+ }
586
+ }
587
+ // All mask bits on this field are zero, skipping it.
588
+ if (AllZero)
589
+ GapMask.clearBit (F);
590
+ }
591
+ }
592
+
593
+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
594
+ ElementCount LeafValueEC) {
595
+ auto GapMask = APInt::getAllOnes (Factor);
596
+
561
597
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
562
598
if (unsigned F = getInterleaveIntrinsicFactor (IMI->getIntrinsicID ());
563
599
F && F == Factor && llvm::all_equal (IMI->args ())) {
564
- return IMI->getArgOperand (0 );
600
+ return { IMI->getArgOperand (0 ), GapMask} ;
565
601
}
566
602
}
567
603
568
604
if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
569
605
if (auto *Splat = ConstMask->getSplatValue ())
570
606
// All-ones or all-zeros mask.
571
- return ConstantVector::getSplat (LeafValueEC, Splat);
607
+ return { ConstantVector::getSplat (LeafValueEC, Splat), GapMask} ;
572
608
573
609
if (LeafValueEC.isFixed ()) {
574
610
unsigned LeafMaskLen = LeafValueEC.getFixedValue ();
611
+ // First, check if we use a gap mask to skip some of the factors / fields.
612
+ getGapMask (*ConstMask, Factor, LeafMaskLen, GapMask);
613
+
575
614
SmallVector<Constant *, 8 > LeafMask (LeafMaskLen, nullptr );
576
615
// If this is a fixed-length constant mask, each lane / leaf has to
577
616
// use the same mask. This is done by checking if every group with Factor
578
617
// number of elements in the interleaved mask has homogeneous values.
579
618
for (unsigned Idx = 0U ; Idx < LeafMaskLen * Factor; ++Idx) {
619
+ if (!GapMask[Idx % Factor])
620
+ continue ;
580
621
Constant *C = ConstMask->getAggregateElement (Idx);
581
622
if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
582
- return nullptr ;
623
+ return { nullptr , GapMask} ;
583
624
LeafMask[Idx / Factor] = C;
584
625
}
585
626
586
- return ConstantVector::get (LeafMask);
627
+ return { ConstantVector::get (LeafMask), GapMask} ;
587
628
}
588
629
}
589
630
@@ -603,12 +644,13 @@ static Value *getMask(Value *WideMask, unsigned Factor,
603
644
auto *LeafMaskTy =
604
645
VectorType::get (Type::getInt1Ty (SVI->getContext ()), LeafValueEC);
605
646
IRBuilder<> Builder (SVI);
606
- return Builder.CreateExtractVector (LeafMaskTy, SVI->getOperand (0 ),
607
- uint64_t (0 ));
647
+ return {Builder.CreateExtractVector (LeafMaskTy, SVI->getOperand (0 ),
648
+ uint64_t (0 )),
649
+ GapMask};
608
650
}
609
651
}
610
652
611
- return nullptr ;
653
+ return { nullptr , GapMask} ;
612
654
}
613
655
614
656
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
@@ -639,9 +681,16 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
639
681
return false ;
640
682
641
683
// Check mask operand. Handle both all-true/false and interleaved mask.
642
- Mask = getMask (getMaskOperand (II), Factor, getDeinterleavedVectorType (DI));
684
+ APInt GapMask (Factor, 0 );
685
+ std::tie (Mask, GapMask) =
686
+ getMask (getMaskOperand (II), Factor, getDeinterleavedVectorType (DI));
643
687
if (!Mask)
644
688
return false ;
689
+ // We haven't supported gap mask if it's deinterleaving using intrinsics.
690
+ // Yet it is possible that we already changed the IR, hence returning true
691
+ // here.
692
+ if (GapMask.popcount () != Factor)
693
+ return true ;
645
694
646
695
LLVM_DEBUG (dbgs () << " IA: Found a vp.load or masked.load with deinterleave"
647
696
<< " intrinsic " << *DI << " and factor = "
@@ -680,10 +729,16 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
680
729
II->getIntrinsicID () != Intrinsic::vp_store)
681
730
return false ;
682
731
// Check mask operand. Handle both all-true/false and interleaved mask.
683
- Mask = getMask (getMaskOperand (II), Factor,
684
- cast<VectorType>(InterleaveValues[0 ]->getType ()));
732
+ APInt GapMask (Factor, 0 );
733
+ std::tie (Mask, GapMask) =
734
+ getMask (getMaskOperand (II), Factor,
735
+ cast<VectorType>(InterleaveValues[0 ]->getType ()));
685
736
if (!Mask)
686
737
return false ;
738
+ // We haven't supported gap mask if it's interleaving using intrinsics. Yet
739
+ // it is possible that we already changed the IR, hence returning true here.
740
+ if (GapMask.popcount () != Factor)
741
+ return true ;
687
742
688
743
LLVM_DEBUG (dbgs () << " IA: Found a vp.store or masked.store with interleave"
689
744
<< " intrinsic " << *IntII << " and factor = "
0 commit comments