@@ -268,13 +268,16 @@ static Value *getMaskOperand(IntrinsicInst *II) {
268268 }
269269}
270270
271- // Return the corresponded deinterleaved mask, or nullptr if there is no valid
272- // mask.
273- static Value *getMask (Value *WideMask, unsigned Factor,
274- ElementCount LeafValueEC);
275-
276- static Value *getMask (Value *WideMask, unsigned Factor,
277- VectorType *LeafValueTy) {
271+ // Return a pair of
272+ // (1) The corresponded deinterleaved mask, or nullptr if there is no valid
273+ // mask.
274+ // (2) Some mask effectively skips a certain field, and this element is a mask
275+ // in which inactive lanes represent fields that are skipped (i.e. "gaps").
276+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
277+ ElementCount LeafValueEC);
278+
279+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
280+ VectorType *LeafValueTy) {
278281 return getMask (WideMask, Factor, LeafValueTy->getElementCount ());
279282}
280283
@@ -379,22 +382,25 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
379382 replaceBinOpShuffles (BinOpShuffles.getArrayRef (), Shuffles, Load);
380383
381384 Value *Mask = nullptr ;
385+ auto GapMask = APInt::getAllOnes (Factor);
382386 if (LI) {
383387 LLVM_DEBUG (dbgs () << " IA: Found an interleaved load: " << *Load << " \n " );
384388 } else {
385389 // Check mask operand. Handle both all-true/false and interleaved mask.
386- Mask = getMask (getMaskOperand (II), Factor, VecTy);
390+ std::tie ( Mask, GapMask) = getMask (getMaskOperand (II), Factor, VecTy);
387391 if (!Mask)
388392 return false ;
389393
390394 LLVM_DEBUG (dbgs () << " IA: Found an interleaved vp.load or masked.load: "
391395 << *Load << " \n " );
396+ LLVM_DEBUG (dbgs () << " IA: With nominal factor " << Factor
397+ << " and actual factor " << GapMask.popcount () << " \n " );
392398 }
393399
394400 // Try to create target specific intrinsics to replace the load and
395401 // shuffles.
396402 if (!TLI->lowerInterleavedLoad (cast<Instruction>(Load), Mask, Shuffles,
397- Indices, Factor))
403+ Indices, Factor, GapMask ))
398404 // If Extracts is not empty, tryReplaceExtracts made changes earlier.
399405 return !Extracts.empty () || BinOpShuffleChanged;
400406
@@ -536,10 +542,15 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
536542 } else {
537543 // Check mask operand. Handle both all-true/false and interleaved mask.
538544 unsigned LaneMaskLen = NumStoredElements / Factor;
539- Mask = getMask (getMaskOperand (II), Factor,
540- ElementCount::getFixed (LaneMaskLen));
545+ APInt GapMask (Factor, 0 );
546+ std::tie (Mask, GapMask) = getMask (getMaskOperand (II), Factor,
547+ ElementCount::getFixed (LaneMaskLen));
541548 if (!Mask)
542549 return false ;
550+ // We haven't supported gap mask for stores. Yet it is possible that we
551+ // already changed the IR, hence returning true here.
552+ if (GapMask.popcount () != Factor)
553+ return true ;
543554
544555 LLVM_DEBUG (dbgs () << " IA: Found an interleaved vp.store or masked.store: "
545556 << *Store << " \n " );
@@ -556,34 +567,64 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
556567 return true ;
557568}
558569
559- static Value *getMask (Value *WideMask, unsigned Factor,
560- ElementCount LeafValueEC) {
570+ // A wide mask <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> could be used to skip the
571+ // last field in a factor-of-three interleaved store or deinterleaved load (in
572+ // which case LeafMaskLen is 4). Such (wide) mask is also known as gap mask.
573+ // This helper function tries to detect this pattern and return the actual
574+ // factor we're accessing, which is 2 in this example.
575+ static void getGapMask (const Constant &MaskConst, unsigned Factor,
576+ unsigned LeafMaskLen, APInt &GapMask) {
577+ assert (GapMask.getBitWidth () == Factor);
578+ for (unsigned F = 0U ; F < Factor; ++F) {
579+ bool AllZero = true ;
580+ for (unsigned Idx = 0U ; Idx < LeafMaskLen; ++Idx) {
581+ Constant *C = MaskConst.getAggregateElement (F + Idx * Factor);
582+ if (!C->isZeroValue ()) {
583+ AllZero = false ;
584+ break ;
585+ }
586+ }
587+ // All mask bits on this field are zero, skipping it.
588+ if (AllZero)
589+ GapMask.clearBit (F);
590+ }
591+ }
592+
593+ static std::pair<Value *, APInt> getMask (Value *WideMask, unsigned Factor,
594+ ElementCount LeafValueEC) {
595+ auto GapMask = APInt::getAllOnes (Factor);
596+
561597 if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
562598 if (unsigned F = getInterleaveIntrinsicFactor (IMI->getIntrinsicID ());
563599 F && F == Factor && llvm::all_equal (IMI->args ())) {
564- return IMI->getArgOperand (0 );
600+ return { IMI->getArgOperand (0 ), GapMask} ;
565601 }
566602 }
567603
568604 if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
569605 if (auto *Splat = ConstMask->getSplatValue ())
570606 // All-ones or all-zeros mask.
571- return ConstantVector::getSplat (LeafValueEC, Splat);
607+ return { ConstantVector::getSplat (LeafValueEC, Splat), GapMask} ;
572608
573609 if (LeafValueEC.isFixed ()) {
574610 unsigned LeafMaskLen = LeafValueEC.getFixedValue ();
611+ // First, check if we use a gap mask to skip some of the factors / fields.
612+ getGapMask (*ConstMask, Factor, LeafMaskLen, GapMask);
613+
575614 SmallVector<Constant *, 8 > LeafMask (LeafMaskLen, nullptr );
576615 // If this is a fixed-length constant mask, each lane / leaf has to
577616 // use the same mask. This is done by checking if every group with Factor
578617 // number of elements in the interleaved mask has homogeneous values.
579618 for (unsigned Idx = 0U ; Idx < LeafMaskLen * Factor; ++Idx) {
619+ if (!GapMask[Idx % Factor])
620+ continue ;
580621 Constant *C = ConstMask->getAggregateElement (Idx);
581622 if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
582- return nullptr ;
623+ return { nullptr , GapMask} ;
583624 LeafMask[Idx / Factor] = C;
584625 }
585626
586- return ConstantVector::get (LeafMask);
627+ return { ConstantVector::get (LeafMask), GapMask} ;
587628 }
588629 }
589630
@@ -603,12 +644,13 @@ static Value *getMask(Value *WideMask, unsigned Factor,
603644 auto *LeafMaskTy =
604645 VectorType::get (Type::getInt1Ty (SVI->getContext ()), LeafValueEC);
605646 IRBuilder<> Builder (SVI);
606- return Builder.CreateExtractVector (LeafMaskTy, SVI->getOperand (0 ),
607- uint64_t (0 ));
647+ return {Builder.CreateExtractVector (LeafMaskTy, SVI->getOperand (0 ),
648+ uint64_t (0 )),
649+ GapMask};
608650 }
609651 }
610652
611- return nullptr ;
653+ return { nullptr , GapMask} ;
612654}
613655
614656bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
@@ -639,9 +681,16 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
639681 return false ;
640682
641683 // Check mask operand. Handle both all-true/false and interleaved mask.
642- Mask = getMask (getMaskOperand (II), Factor, getDeinterleavedVectorType (DI));
684+ APInt GapMask (Factor, 0 );
685+ std::tie (Mask, GapMask) =
686+ getMask (getMaskOperand (II), Factor, getDeinterleavedVectorType (DI));
643687 if (!Mask)
644688 return false ;
689+ // We haven't supported gap mask if it's deinterleaving using intrinsics.
690+ // Yet it is possible that we already changed the IR, hence returning true
691+ // here.
692+ if (GapMask.popcount () != Factor)
693+ return true ;
645694
646695 LLVM_DEBUG (dbgs () << " IA: Found a vp.load or masked.load with deinterleave"
647696 << " intrinsic " << *DI << " and factor = "
@@ -680,10 +729,16 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
680729 II->getIntrinsicID () != Intrinsic::vp_store)
681730 return false ;
682731 // Check mask operand. Handle both all-true/false and interleaved mask.
683- Mask = getMask (getMaskOperand (II), Factor,
684- cast<VectorType>(InterleaveValues[0 ]->getType ()));
732+ APInt GapMask (Factor, 0 );
733+ std::tie (Mask, GapMask) =
734+ getMask (getMaskOperand (II), Factor,
735+ cast<VectorType>(InterleaveValues[0 ]->getType ()));
685736 if (!Mask)
686737 return false ;
738+ // We haven't supported gap mask if it's interleaving using intrinsics. Yet
739+ // it is possible that we already changed the IR, hence returning true here.
740+ if (GapMask.popcount () != Factor)
741+ return true ;
687742
688743 LLVM_DEBUG (dbgs () << " IA: Found a vp.store or masked.store with interleave"
689744 << " intrinsic " << *IntII << " and factor = "
0 commit comments