@@ -630,11 +630,34 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
630630 return true ;
631631}
632632
633+ // / Check the interleaved mask
634+ // /
635+ // / - if a value within the optional is non-nullptr, the value corresponds to
636+ // / deinterleaved mask
637+ // / - if a value within the option is nullptr, the value corresponds to all-true
638+ // / mask
639+ // / - return nullopt if mask cannot be deinterleaved
640+ static std::optional<Value *> getMask (Value *WideMask, unsigned Factor) {
641+ using namespace llvm ::PatternMatch;
642+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
643+ SmallVector<Value *, 8 > Operands;
644+ SmallVector<Instruction *, 8 > DeadInsts;
645+ if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
646+ assert (!Operands.empty ());
647+ if (Operands.size () == Factor &&
648+ std::equal (Operands.begin (), Operands.end (), Operands.begin ()))
649+ return Operands.front ();
650+ }
651+ }
652+ if (match (WideMask, m_AllOnes ()))
653+ return nullptr ;
654+ return std::nullopt ;
655+ }
656+
633657bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
634658 IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
635- LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand (0 ));
636-
637- if (!LI || !LI->hasOneUse () || !LI->isSimple ())
659+ Value *LoadedVal = DI->getOperand (0 );
660+ if (!LoadedVal->hasOneUse () || !isa<LoadInst, VPIntrinsic>(LoadedVal))
638661 return false ;
639662
640663 SmallVector<Value *, 8 > DeinterleaveValues;
@@ -643,43 +666,92 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
643666 DeinterleaveDeadInsts))
644667 return false ;
645668
646- LLVM_DEBUG (dbgs () << " IA: Found a deinterleave intrinsic: " << *DI
647- << " with factor = " << DeinterleaveValues.size () << " \n " );
669+ const unsigned Factor = DeinterleaveValues.size ();
648670
649- // Try and match this with target specific intrinsics.
650- if (!TLI->lowerDeinterleaveIntrinsicToLoad (LI, DeinterleaveValues))
651- return false ;
671+ if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
672+ if (VPLoad->getIntrinsicID () != Intrinsic::vp_load)
673+ return false ;
674+ // Check mask operand. Handle both all-true and interleaved mask.
675+ Value *WideMask = VPLoad->getOperand (1 );
676+ std::optional<Value *> Mask = getMask (WideMask, Factor);
677+ if (!Mask)
678+ return false ;
679+
680+ LLVM_DEBUG (dbgs () << " IA: Found a vp.load with deinterleave intrinsic "
681+ << *DI << " and factor = " << Factor << " \n " );
682+
683+ // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
684+ // TLI function to emit target-specific interleaved instruction.
685+ if (!TLI->lowerInterleavedScalableLoad (VPLoad, *Mask, DI,
686+ DeinterleaveValues))
687+ return false ;
688+
689+ } else {
690+ auto *LI = cast<LoadInst>(LoadedVal);
691+ if (!LI->isSimple ())
692+ return false ;
693+
694+ LLVM_DEBUG (dbgs () << " IA: Found a load with deinterleave intrinsic " << *DI
695+ << " and factor = " << Factor << " \n " );
696+
697+ // Try and match this with target specific intrinsics.
698+ if (!TLI->lowerDeinterleaveIntrinsicToLoad (LI, DeinterleaveValues))
699+ return false ;
700+ }
652701
653702 DeadInsts.insert (DeinterleaveDeadInsts.begin (), DeinterleaveDeadInsts.end ());
654703 // We now have a target-specific load, so delete the old one.
655- DeadInsts.insert (LI );
704+ DeadInsts.insert (cast<Instruction>(LoadedVal) );
656705 return true ;
657706}
658707
659708bool InterleavedAccessImpl::lowerInterleaveIntrinsic (
660709 IntrinsicInst *II, SmallSetVector<Instruction *, 32 > &DeadInsts) {
661710 if (!II->hasOneUse ())
662711 return false ;
663-
664- StoreInst *SI = dyn_cast<StoreInst>(*(II->users ().begin ()));
665-
666- if (!SI || !SI->isSimple ())
712+ Value *StoredBy = II->user_back ();
713+ if (!isa<StoreInst, VPIntrinsic>(StoredBy))
667714 return false ;
668715
669716 SmallVector<Value *, 8 > InterleaveValues;
670717 SmallVector<Instruction *, 8 > InterleaveDeadInsts;
671718 if (!getVectorInterleaveFactor (II, InterleaveValues, InterleaveDeadInsts))
672719 return false ;
673720
674- LLVM_DEBUG (dbgs () << " IA: Found an interleave intrinsic: " << *II
675- << " with factor = " << InterleaveValues.size () << " \n " );
721+ const unsigned Factor = InterleaveValues.size ();
676722
677- // Try and match this with target specific intrinsics.
678- if (!TLI->lowerInterleaveIntrinsicToStore (SI, InterleaveValues))
679- return false ;
723+ if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
724+ if (VPStore->getIntrinsicID () != Intrinsic::vp_store)
725+ return false ;
726+
727+ Value *WideMask = VPStore->getOperand (2 );
728+ std::optional<Value *> Mask = getMask (WideMask, Factor);
729+ if (!Mask)
730+ return false ;
731+
732+ LLVM_DEBUG (dbgs () << " IA: Found a vp.store with interleave intrinsic "
733+ << *II << " and factor = " << Factor << " \n " );
734+
735+ // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
736+ // TLI function to emit target-specific interleaved instruction.
737+ if (!TLI->lowerInterleavedScalableStore (VPStore, *Mask, II,
738+ InterleaveValues))
739+ return false ;
740+ } else {
741+ auto *SI = cast<StoreInst>(StoredBy);
742+ if (!SI->isSimple ())
743+ return false ;
744+
745+ LLVM_DEBUG (dbgs () << " IA: Found a store with interleave intrinsic " << *II
746+ << " and factor = " << Factor << " \n " );
747+
748+ // Try and match this with target specific intrinsics.
749+ if (!TLI->lowerInterleaveIntrinsicToStore (SI, InterleaveValues))
750+ return false ;
751+ }
680752
681753 // We now have a target-specific store, so delete the old one.
682- DeadInsts.insert (SI );
754+ DeadInsts.insert (cast<Instruction>(StoredBy) );
683755 DeadInsts.insert (InterleaveDeadInsts.begin (), InterleaveDeadInsts.end ());
684756 return true ;
685757}
0 commit comments