@@ -249,195 +249,9 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
249249 return false ;
250250}
251251
252- // For an (de)interleave tree like this:
253- //
254- // A C B D
255- // |___| |___|
256- // |_____|
257- // |
258- // A B C D
259- //
260- // We will get ABCD at the end while the leaf operands/results
261- // are ACBD, which are also what we initially collected in
262- // getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
263- // hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
264- // to reorder them by interleaving these values.
265- static void interleaveLeafValues (MutableArrayRef<Value *> SubLeaves) {
266- unsigned NumLeaves = SubLeaves.size ();
267- if (NumLeaves == 2 )
268- return ;
269-
270- assert (isPowerOf2_32 (NumLeaves) && NumLeaves > 1 );
271-
272- const unsigned HalfLeaves = NumLeaves / 2 ;
273- // Visit the sub-trees.
274- interleaveLeafValues (SubLeaves.take_front (HalfLeaves));
275- interleaveLeafValues (SubLeaves.drop_front (HalfLeaves));
276-
277- SmallVector<Value *, 8 > Buffer;
278- // a0 a1 a2 a3 b0 b1 b2 b3
279- // -> a0 b0 a1 b1 a2 b2 a3 b3
280- for (unsigned i = 0U ; i < NumLeaves; ++i)
281- Buffer.push_back (SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0 )]);
282-
283- llvm::copy (Buffer, SubLeaves.begin ());
284- }
285-
286- static bool
287- getVectorInterleaveFactor (IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
288- SmallVectorImpl<Instruction *> &DeadInsts) {
289- assert (II->getIntrinsicID () == Intrinsic::vector_interleave2);
290-
291- // Visit with BFS
292- SmallVector<IntrinsicInst *, 8 > Queue;
293- Queue.push_back (II);
294- while (!Queue.empty ()) {
295- IntrinsicInst *Current = Queue.front ();
296- Queue.erase (Queue.begin ());
297-
298- // All the intermediate intrinsics will be deleted.
299- DeadInsts.push_back (Current);
300-
301- for (unsigned I = 0 ; I < 2 ; ++I) {
302- Value *Op = Current->getOperand (I);
303- if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
304- if (OpII->getIntrinsicID () == Intrinsic::vector_interleave2) {
305- Queue.push_back (OpII);
306- continue ;
307- }
308-
309- // If this is not a perfectly balanced tree, the leaf
310- // result types would be different.
311- if (!Operands.empty () && Op->getType () != Operands.back ()->getType ())
312- return false ;
313-
314- Operands.push_back (Op);
315- }
316- }
317-
318- const unsigned Factor = Operands.size ();
319- // Currently we only recognize power-of-two factors.
320- // FIXME: should we assert here instead?
321- if (Factor <= 1 || !isPowerOf2_32 (Factor))
322- return false ;
323-
324- interleaveLeafValues (Operands);
325- return true ;
326- }
327-
328- static bool
329- getVectorDeinterleaveFactor (IntrinsicInst *II,
330- SmallVectorImpl<Value *> &Results,
331- SmallVectorImpl<Instruction *> &DeadInsts) {
332- assert (II->getIntrinsicID () == Intrinsic::vector_deinterleave2);
333- using namespace PatternMatch ;
334- if (!II->hasNUses (2 ))
335- return false ;
336-
337- // Visit with BFS
338- SmallVector<IntrinsicInst *, 8 > Queue;
339- Queue.push_back (II);
340- while (!Queue.empty ()) {
341- IntrinsicInst *Current = Queue.front ();
342- Queue.erase (Queue.begin ());
343- assert (Current->hasNUses (2 ));
344-
345- // All the intermediate intrinsics will be deleted from the bottom-up.
346- DeadInsts.insert (DeadInsts.begin (), Current);
347-
348- ExtractValueInst *LHS = nullptr , *RHS = nullptr ;
349- for (User *Usr : Current->users ()) {
350- if (!isa<ExtractValueInst>(Usr))
351- return 0 ;
352-
353- auto *EV = cast<ExtractValueInst>(Usr);
354- // Intermediate ExtractValue instructions will also be deleted.
355- DeadInsts.insert (DeadInsts.begin (), EV);
356- ArrayRef<unsigned > Indices = EV->getIndices ();
357- if (Indices.size () != 1 )
358- return false ;
359-
360- if (Indices[0 ] == 0 && !LHS)
361- LHS = EV;
362- else if (Indices[0 ] == 1 && !RHS)
363- RHS = EV;
364- else
365- return false ;
366- }
367-
368- // We have legal indices. At this point we're either going
369- // to continue the traversal or push the leaf values into Results.
370- for (ExtractValueInst *EV : {LHS, RHS}) {
371- // Continue the traversal. We're playing safe here and matching only the
372- // expression consisting of a perfectly balanced binary tree in which all
373- // intermediate values are only used once.
374- if (EV->hasOneUse () &&
375- match (EV->user_back (),
376- m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
377- EV->user_back ()->hasNUses (2 )) {
378- auto *EVUsr = cast<IntrinsicInst>(EV->user_back ());
379- Queue.push_back (EVUsr);
380- continue ;
381- }
382-
383- // If this is not a perfectly balanced tree, the leaf
384- // result types would be different.
385- if (!Results.empty () && EV->getType () != Results.back ()->getType ())
386- return false ;
387-
388- // Save the leaf value.
389- Results.push_back (EV);
390- }
391- }
392-
393- const unsigned Factor = Results.size ();
394- // Currently we only recognize power-of-two factors.
395- // FIXME: should we assert here instead?
396- if (Factor <= 1 || !isPowerOf2_32 (Factor))
397- return 0 ;
398-
399- interleaveLeafValues (Results);
400- return true ;
401- }
402-
403- // Return the corresponded deinterleaved mask, or nullptr if there is no valid
404- // mask.
405252static Value *getMask (Value *WideMask, unsigned Factor,
406- ElementCount LeafValueEC) {
407- using namespace llvm ::PatternMatch;
408- if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
409- SmallVector<Value *, 8 > Operands;
410- SmallVector<Instruction *, 8 > DeadInsts;
411- if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
412- assert (!Operands.empty ());
413- if (Operands.size () == Factor && llvm::all_equal (Operands))
414- return Operands[0 ];
415- }
416- }
417-
418- if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
419- if (auto *Splat = ConstMask->getSplatValue ()) {
420- // All-ones or all-zeros mask.
421- return ConstantVector::getSplat (LeafValueEC, Splat);
422- } else if (LeafValueEC.isFixed ()) {
423- unsigned LeafMaskLen = LeafValueEC.getFixedValue ();
424- SmallVector<Constant *, 8 > LeafMask (LeafMaskLen, nullptr );
425- // If this is a fixed-length constant mask, each lane / leaf has to
426- // use the same mask. This is done by checking if every group with Factor
427- // number of elements in the interleaved mask has homogeneous values.
428- for (unsigned Idx = 0U , N = LeafMaskLen * Factor; Idx < N; ++Idx) {
429- Constant *Ref = ConstMask->getAggregateElement (alignDown (Idx, Factor));
430- if (Ref != ConstMask->getAggregateElement (Idx))
431- return nullptr ;
432- LeafMask[Idx / Factor] = Ref;
433- }
253+ ElementCount LeafValueEC);
434254
435- return ConstantVector::get (LeafMask);
436- }
437- }
438-
439- return nullptr ;
440- }
441255static Value *getMask (Value *WideMask, unsigned Factor,
442256 VectorType *LeafValueTy) {
443257 return getMask (WideMask, Factor, LeafValueTy->getElementCount ());
@@ -761,6 +575,195 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
761575 return true ;
762576}
763577
578+ // For an (de)interleave tree like this:
579+ //
580+ // A C B D
581+ // |___| |___|
582+ // |_____|
583+ // |
584+ // A B C D
585+ //
586+ // We will get ABCD at the end while the leaf operands/results
587+ // are ACBD, which are also what we initially collected in
588+ // getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
589+ // hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
590+ // to reorder them by interleaving these values.
591+ static void interleaveLeafValues (MutableArrayRef<Value *> SubLeaves) {
592+ unsigned NumLeaves = SubLeaves.size ();
593+ if (NumLeaves == 2 )
594+ return ;
595+
596+ assert (isPowerOf2_32 (NumLeaves) && NumLeaves > 1 );
597+
598+ const unsigned HalfLeaves = NumLeaves / 2 ;
599+ // Visit the sub-trees.
600+ interleaveLeafValues (SubLeaves.take_front (HalfLeaves));
601+ interleaveLeafValues (SubLeaves.drop_front (HalfLeaves));
602+
603+ SmallVector<Value *, 8 > Buffer;
604+ // a0 a1 a2 a3 b0 b1 b2 b3
605+ // -> a0 b0 a1 b1 a2 b2 a3 b3
606+ for (unsigned i = 0U ; i < NumLeaves; ++i)
607+ Buffer.push_back (SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0 )]);
608+
609+ llvm::copy (Buffer, SubLeaves.begin ());
610+ }
611+
612+ static bool
613+ getVectorInterleaveFactor (IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
614+ SmallVectorImpl<Instruction *> &DeadInsts) {
615+ assert (II->getIntrinsicID () == Intrinsic::vector_interleave2);
616+
617+ // Visit with BFS
618+ SmallVector<IntrinsicInst *, 8 > Queue;
619+ Queue.push_back (II);
620+ while (!Queue.empty ()) {
621+ IntrinsicInst *Current = Queue.front ();
622+ Queue.erase (Queue.begin ());
623+
624+ // All the intermediate intrinsics will be deleted.
625+ DeadInsts.push_back (Current);
626+
627+ for (unsigned I = 0 ; I < 2 ; ++I) {
628+ Value *Op = Current->getOperand (I);
629+ if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
630+ if (OpII->getIntrinsicID () == Intrinsic::vector_interleave2) {
631+ Queue.push_back (OpII);
632+ continue ;
633+ }
634+
635+ // If this is not a perfectly balanced tree, the leaf
636+ // result types would be different.
637+ if (!Operands.empty () && Op->getType () != Operands.back ()->getType ())
638+ return false ;
639+
640+ Operands.push_back (Op);
641+ }
642+ }
643+
644+ const unsigned Factor = Operands.size ();
645+ // Currently we only recognize power-of-two factors.
646+ // FIXME: should we assert here instead?
647+ if (Factor <= 1 || !isPowerOf2_32 (Factor))
648+ return false ;
649+
650+ interleaveLeafValues (Operands);
651+ return true ;
652+ }
653+
654+ static bool
655+ getVectorDeinterleaveFactor (IntrinsicInst *II,
656+ SmallVectorImpl<Value *> &Results,
657+ SmallVectorImpl<Instruction *> &DeadInsts) {
658+ assert (II->getIntrinsicID () == Intrinsic::vector_deinterleave2);
659+ using namespace PatternMatch ;
660+ if (!II->hasNUses (2 ))
661+ return false ;
662+
663+ // Visit with BFS
664+ SmallVector<IntrinsicInst *, 8 > Queue;
665+ Queue.push_back (II);
666+ while (!Queue.empty ()) {
667+ IntrinsicInst *Current = Queue.front ();
668+ Queue.erase (Queue.begin ());
669+ assert (Current->hasNUses (2 ));
670+
671+ // All the intermediate intrinsics will be deleted from the bottom-up.
672+ DeadInsts.insert (DeadInsts.begin (), Current);
673+
674+ ExtractValueInst *LHS = nullptr , *RHS = nullptr ;
675+ for (User *Usr : Current->users ()) {
676+ if (!isa<ExtractValueInst>(Usr))
677+ return 0 ;
678+
679+ auto *EV = cast<ExtractValueInst>(Usr);
680+ // Intermediate ExtractValue instructions will also be deleted.
681+ DeadInsts.insert (DeadInsts.begin (), EV);
682+ ArrayRef<unsigned > Indices = EV->getIndices ();
683+ if (Indices.size () != 1 )
684+ return false ;
685+
686+ if (Indices[0 ] == 0 && !LHS)
687+ LHS = EV;
688+ else if (Indices[0 ] == 1 && !RHS)
689+ RHS = EV;
690+ else
691+ return false ;
692+ }
693+
694+ // We have legal indices. At this point we're either going
695+ // to continue the traversal or push the leaf values into Results.
696+ for (ExtractValueInst *EV : {LHS, RHS}) {
697+ // Continue the traversal. We're playing safe here and matching only the
698+ // expression consisting of a perfectly balanced binary tree in which all
699+ // intermediate values are only used once.
700+ if (EV->hasOneUse () &&
701+ match (EV->user_back (),
702+ m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
703+ EV->user_back ()->hasNUses (2 )) {
704+ auto *EVUsr = cast<IntrinsicInst>(EV->user_back ());
705+ Queue.push_back (EVUsr);
706+ continue ;
707+ }
708+
709+ // If this is not a perfectly balanced tree, the leaf
710+ // result types would be different.
711+ if (!Results.empty () && EV->getType () != Results.back ()->getType ())
712+ return false ;
713+
714+ // Save the leaf value.
715+ Results.push_back (EV);
716+ }
717+ }
718+
719+ const unsigned Factor = Results.size ();
720+ // Currently we only recognize power-of-two factors.
721+ // FIXME: should we assert here instead?
722+ if (Factor <= 1 || !isPowerOf2_32 (Factor))
723+ return 0 ;
724+
725+ interleaveLeafValues (Results);
726+ return true ;
727+ }
728+
729+ // Return the corresponded deinterleaved mask, or nullptr if there is no valid
730+ // mask.
731+ static Value *getMask (Value *WideMask, unsigned Factor,
732+ ElementCount LeafValueEC) {
733+ if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
734+ SmallVector<Value *, 8 > Operands;
735+ SmallVector<Instruction *, 8 > DeadInsts;
736+ if (getVectorInterleaveFactor (IMI, Operands, DeadInsts)) {
737+ assert (!Operands.empty ());
738+ if (Operands.size () == Factor && llvm::all_equal (Operands))
739+ return Operands[0 ];
740+ }
741+ }
742+
743+ if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
744+ if (auto *Splat = ConstMask->getSplatValue ()) {
745+ // All-ones or all-zeros mask.
746+ return ConstantVector::getSplat (LeafValueEC, Splat);
747+ } else if (LeafValueEC.isFixed ()) {
748+ unsigned LeafMaskLen = LeafValueEC.getFixedValue ();
749+ SmallVector<Constant *, 8 > LeafMask (LeafMaskLen, nullptr );
750+ // If this is a fixed-length constant mask, each lane / leaf has to
751+ // use the same mask. This is done by checking if every group with Factor
752+ // number of elements in the interleaved mask has homogeneous values.
753+ for (unsigned Idx = 0U ; Idx < LeafMaskLen * Factor; ++Idx) {
754+ Constant *Ref = ConstMask->getAggregateElement (alignDown (Idx, Factor));
755+ if (Ref != ConstMask->getAggregateElement (Idx))
756+ return nullptr ;
757+ LeafMask[Idx / Factor] = Ref;
758+ }
759+
760+ return ConstantVector::get (LeafMask);
761+ }
762+ }
763+
764+ return nullptr ;
765+ }
766+
764767bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic (
765768 IntrinsicInst *DI, SmallSetVector<Instruction *, 32 > &DeadInsts) {
766769 Value *LoadedVal = DI->getOperand (0 );
0 commit comments