@@ -630,11 +630,37 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
630630 return true ;
631631}
632632
633+ // Return the corresponded deinterleaved mask, or nullptr if there is no valid
634+ // mask.
635+ static Value *getMask (Value *WideMask, unsigned Factor,
636+ VectorType *LeafValueTy) {
637+ using namespace llvm ::PatternMatch;
638+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
639+ SmallVector<Value *, 8 > Operands;
640+ SmallVector<Instruction *, 8 > DeadInsts;
641+ if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
642+ assert (!Operands.empty ());
643+ if (Operands.size () == Factor && llvm::all_equal (Operands))
644+ return Operands[0 ];
645+ }
646+ }
647+
648+ if (match (WideMask, m_AllOnes ())) {
649+ // Scale the vector length of all-ones mask.
650+ ElementCount OrigEC =
651+ cast<VectorType>(WideMask->getType ())->getElementCount ();
652+ assert (OrigEC.getKnownMinValue () % Factor == 0 );
653+ return ConstantVector::getSplat (OrigEC.divideCoefficientBy (Factor),
654+ cast<Constant>(WideMask)->getSplatValue ());
655+ }
656+
657+ return nullptr ;
658+ }
659+
633660bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
634661 IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
635- LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand (0 ));
636-
637- if (!LI || !LI->hasOneUse () || !LI->isSimple ())
662+ Value *LoadedVal = DI->getOperand (0 );
663+ if (!LoadedVal->hasOneUse () || !isa<LoadInst, VPIntrinsic>(LoadedVal))
638664 return false ;
639665
640666 SmallVector<Value *, 8 > DeinterleaveValues;
@@ -643,43 +669,94 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
643669 DeinterleaveDeadInsts))
644670 return false ;
645671
646- LLVM_DEBUG (dbgs () << " IA: Found a deinterleave intrinsic: " << *DI
647- << " with factor = " << DeinterleaveValues.size () << " \n " );
672+ const unsigned Factor = DeinterleaveValues.size ();
648673
649- // Try and match this with target specific intrinsics.
650- if (!TLI->lowerDeinterleaveIntrinsicToLoad (LI, DeinterleaveValues))
651- return false ;
674+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
675+ if (VPLoad->getIntrinsicID () != Intrinsic::vp_load)
676+ return false ;
677+ // Check mask operand. Handle both all-true and interleaved mask.
678+ Value *WideMask = VPLoad->getOperand (1 );
679+ Value *Mask = getMask (WideMask, Factor,
680+ cast<VectorType>(DeinterleaveValues[0 ]->getType ()));
681+ if (!Mask)
682+ return false ;
683+
684+ LLVM_DEBUG (dbgs () << " IA: Found a vp.load with deinterleave intrinsic "
685+ << *DI << " and factor = " << Factor << " \n " );
686+
687+ // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
688+ // TLI function to emit target-specific interleaved instruction.
689+ if (!TLI->lowerDeinterleavedIntrinsicToVPLoad (VPLoad, Mask,
690+ DeinterleaveValues))
691+ return false ;
692+
693+ } else {
694+ auto *LI = cast<LoadInst>(LoadedVal);
695+ if (!LI->isSimple ())
696+ return false ;
697+
698+ LLVM_DEBUG (dbgs () << " IA: Found a load with deinterleave intrinsic " << *DI
699+ << " and factor = " << Factor << " \n " );
700+
701+ // Try and match this with target specific intrinsics.
702+ if (!TLI->lowerDeinterleaveIntrinsicToLoad (LI, DeinterleaveValues))
703+ return false ;
704+ }
652705
653706 DeadInsts.insert (DeinterleaveDeadInsts.begin (), DeinterleaveDeadInsts.end ());
654707 // We now have a target-specific load, so delete the old one.
655- DeadInsts.insert (LI );
708+ DeadInsts.insert (cast<Instruction>(LoadedVal) );
656709 return true ;
657710}
658711
659712bool InterleavedAccessImpl::lowerInterleaveIntrinsic (
660713 IntrinsicInst *II, SmallSetVector<Instruction *, 32 > &DeadInsts) {
661714 if (!II->hasOneUse ())
662715 return false ;
663-
664- StoreInst *SI = dyn_cast<StoreInst>(*(II->users ().begin ()));
665-
666- if (!SI || !SI->isSimple ())
716+ Value *StoredBy = II->user_back ();
717+ if (!isa<StoreInst, VPIntrinsic>(StoredBy))
667718 return false ;
668719
669720 SmallVector<Value *, 8 > InterleaveValues;
670721 SmallVector<Instruction *, 8 > InterleaveDeadInsts;
671722 if (!getVectorInterleaveFactor (II, InterleaveValues, InterleaveDeadInsts))
672723 return false ;
673724
674- LLVM_DEBUG (dbgs () << " IA: Found an interleave intrinsic: " << *II
675- << " with factor = " << InterleaveValues.size () << " \n " );
725+ const unsigned Factor = InterleaveValues.size ();
676726
677- // Try and match this with target specific intrinsics.
678- if (!TLI->lowerInterleaveIntrinsicToStore (SI, InterleaveValues))
679- return false ;
727+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
728+ if (VPStore->getIntrinsicID () != Intrinsic::vp_store)
729+ return false ;
730+
731+ Value *WideMask = VPStore->getOperand (2 );
732+ Value *Mask = getMask (WideMask, Factor,
733+ cast<VectorType>(InterleaveValues[0 ]->getType ()));
734+ if (!Mask)
735+ return false ;
736+
737+ LLVM_DEBUG (dbgs () << " IA: Found a vp.store with interleave intrinsic "
738+ << *II << " and factor = " << Factor << " \n " );
739+
740+ // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
741+ // TLI function to emit target-specific interleaved instruction.
742+ if (!TLI->lowerInterleavedIntrinsicToVPStore (VPStore, Mask,
743+ InterleaveValues))
744+ return false ;
745+ } else {
746+ auto *SI = cast<StoreInst>(StoredBy);
747+ if (!SI->isSimple ())
748+ return false ;
749+
750+ LLVM_DEBUG (dbgs () << " IA: Found a store with interleave intrinsic " << *II
751+ << " and factor = " << Factor << " \n " );
752+
753+ // Try and match this with target specific intrinsics.
754+ if (!TLI->lowerInterleaveIntrinsicToStore (SI, InterleaveValues))
755+ return false ;
756+ }
680757
681758 // We now have a target-specific store, so delete the old one.
682- DeadInsts.insert (SI );
759+ DeadInsts.insert (cast<Instruction>(StoredBy) );
683760 DeadInsts.insert (InterleaveDeadInsts.begin (), InterleaveDeadInsts.end ());
684761 return true ;
685762}
0 commit comments