Skip to content

Commit e46d311

Browse files
committed
fixup! Address review comments
1 parent a41bfb1 commit e46d311

File tree

1 file changed

+190
-187
lines changed

1 file changed

+190
-187
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 190 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -249,195 +249,9 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
249249
return false;
250250
}
251251

252-
// For an (de)interleave tree like this:
253-
//
254-
// A C B D
255-
// |___| |___|
256-
// |_____|
257-
// |
258-
// A B C D
259-
//
260-
// We will get ABCD at the end while the leaf operands/results
261-
// are ACBD, which are also what we initially collected in
262-
// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
263-
// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
264-
// to reorder them by interleaving these values.
265-
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
266-
unsigned NumLeaves = SubLeaves.size();
267-
if (NumLeaves == 2)
268-
return;
269-
270-
assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
271-
272-
const unsigned HalfLeaves = NumLeaves / 2;
273-
// Visit the sub-trees.
274-
interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
275-
interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
276-
277-
SmallVector<Value *, 8> Buffer;
278-
// a0 a1 a2 a3 b0 b1 b2 b3
279-
// -> a0 b0 a1 b1 a2 b2 a3 b3
280-
for (unsigned i = 0U; i < NumLeaves; ++i)
281-
Buffer.push_back(SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0)]);
282-
283-
llvm::copy(Buffer, SubLeaves.begin());
284-
}
285-
286-
static bool
287-
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
288-
SmallVectorImpl<Instruction *> &DeadInsts) {
289-
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2);
290-
291-
// Visit with BFS
292-
SmallVector<IntrinsicInst *, 8> Queue;
293-
Queue.push_back(II);
294-
while (!Queue.empty()) {
295-
IntrinsicInst *Current = Queue.front();
296-
Queue.erase(Queue.begin());
297-
298-
// All the intermediate intrinsics will be deleted.
299-
DeadInsts.push_back(Current);
300-
301-
for (unsigned I = 0; I < 2; ++I) {
302-
Value *Op = Current->getOperand(I);
303-
if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
304-
if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
305-
Queue.push_back(OpII);
306-
continue;
307-
}
308-
309-
// If this is not a perfectly balanced tree, the leaf
310-
// result types would be different.
311-
if (!Operands.empty() && Op->getType() != Operands.back()->getType())
312-
return false;
313-
314-
Operands.push_back(Op);
315-
}
316-
}
317-
318-
const unsigned Factor = Operands.size();
319-
// Currently we only recognize power-of-two factors.
320-
// FIXME: should we assert here instead?
321-
if (Factor <= 1 || !isPowerOf2_32(Factor))
322-
return false;
323-
324-
interleaveLeafValues(Operands);
325-
return true;
326-
}
327-
328-
static bool
329-
getVectorDeinterleaveFactor(IntrinsicInst *II,
330-
SmallVectorImpl<Value *> &Results,
331-
SmallVectorImpl<Instruction *> &DeadInsts) {
332-
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2);
333-
using namespace PatternMatch;
334-
if (!II->hasNUses(2))
335-
return false;
336-
337-
// Visit with BFS
338-
SmallVector<IntrinsicInst *, 8> Queue;
339-
Queue.push_back(II);
340-
while (!Queue.empty()) {
341-
IntrinsicInst *Current = Queue.front();
342-
Queue.erase(Queue.begin());
343-
assert(Current->hasNUses(2));
344-
345-
// All the intermediate intrinsics will be deleted from the bottom-up.
346-
DeadInsts.insert(DeadInsts.begin(), Current);
347-
348-
ExtractValueInst *LHS = nullptr, *RHS = nullptr;
349-
for (User *Usr : Current->users()) {
350-
if (!isa<ExtractValueInst>(Usr))
351-
return 0;
352-
353-
auto *EV = cast<ExtractValueInst>(Usr);
354-
// Intermediate ExtractValue instructions will also be deleted.
355-
DeadInsts.insert(DeadInsts.begin(), EV);
356-
ArrayRef<unsigned> Indices = EV->getIndices();
357-
if (Indices.size() != 1)
358-
return false;
359-
360-
if (Indices[0] == 0 && !LHS)
361-
LHS = EV;
362-
else if (Indices[0] == 1 && !RHS)
363-
RHS = EV;
364-
else
365-
return false;
366-
}
367-
368-
// We have legal indices. At this point we're either going
369-
// to continue the traversal or push the leaf values into Results.
370-
for (ExtractValueInst *EV : {LHS, RHS}) {
371-
// Continue the traversal. We're playing safe here and matching only the
372-
// expression consisting of a perfectly balanced binary tree in which all
373-
// intermediate values are only used once.
374-
if (EV->hasOneUse() &&
375-
match(EV->user_back(),
376-
m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
377-
EV->user_back()->hasNUses(2)) {
378-
auto *EVUsr = cast<IntrinsicInst>(EV->user_back());
379-
Queue.push_back(EVUsr);
380-
continue;
381-
}
382-
383-
// If this is not a perfectly balanced tree, the leaf
384-
// result types would be different.
385-
if (!Results.empty() && EV->getType() != Results.back()->getType())
386-
return false;
387-
388-
// Save the leaf value.
389-
Results.push_back(EV);
390-
}
391-
}
392-
393-
const unsigned Factor = Results.size();
394-
// Currently we only recognize power-of-two factors.
395-
// FIXME: should we assert here instead?
396-
if (Factor <= 1 || !isPowerOf2_32(Factor))
397-
return 0;
398-
399-
interleaveLeafValues(Results);
400-
return true;
401-
}
402-
403-
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
404-
// mask.
405252
static Value *getMask(Value *WideMask, unsigned Factor,
406-
ElementCount LeafValueEC) {
407-
using namespace llvm::PatternMatch;
408-
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
409-
SmallVector<Value *, 8> Operands;
410-
SmallVector<Instruction *, 8> DeadInsts;
411-
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
412-
assert(!Operands.empty());
413-
if (Operands.size() == Factor && llvm::all_equal(Operands))
414-
return Operands[0];
415-
}
416-
}
417-
418-
if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
419-
if (auto *Splat = ConstMask->getSplatValue()) {
420-
// All-ones or all-zeros mask.
421-
return ConstantVector::getSplat(LeafValueEC, Splat);
422-
} else if (LeafValueEC.isFixed()) {
423-
unsigned LeafMaskLen = LeafValueEC.getFixedValue();
424-
SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
425-
// If this is a fixed-length constant mask, each lane / leaf has to
426-
// use the same mask. This is done by checking if every group with Factor
427-
// number of elements in the interleaved mask has homogeneous values.
428-
for (unsigned Idx = 0U, N = LeafMaskLen * Factor; Idx < N; ++Idx) {
429-
Constant *Ref = ConstMask->getAggregateElement(alignDown(Idx, Factor));
430-
if (Ref != ConstMask->getAggregateElement(Idx))
431-
return nullptr;
432-
LeafMask[Idx / Factor] = Ref;
433-
}
253+
ElementCount LeafValueEC);
434254

435-
return ConstantVector::get(LeafMask);
436-
}
437-
}
438-
439-
return nullptr;
440-
}
441255
static Value *getMask(Value *WideMask, unsigned Factor,
442256
VectorType *LeafValueTy) {
443257
return getMask(WideMask, Factor, LeafValueTy->getElementCount());
@@ -761,6 +575,195 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
761575
return true;
762576
}
763577

578+
// For an (de)interleave tree like this:
579+
//
580+
// A C B D
581+
// |___| |___|
582+
// |_____|
583+
// |
584+
// A B C D
585+
//
586+
// We will get ABCD at the end while the leaf operands/results
587+
// are ACBD, which are also what we initially collected in
588+
// getVectorInterleaveFactor / getVectorDeinterleaveFactor. But TLI
589+
// hooks (e.g. lowerDeinterleaveIntrinsicToLoad) expect ABCD, so we need
590+
// to reorder them by interleaving these values.
591+
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
592+
unsigned NumLeaves = SubLeaves.size();
593+
if (NumLeaves == 2)
594+
return;
595+
596+
assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
597+
598+
const unsigned HalfLeaves = NumLeaves / 2;
599+
// Visit the sub-trees.
600+
interleaveLeafValues(SubLeaves.take_front(HalfLeaves));
601+
interleaveLeafValues(SubLeaves.drop_front(HalfLeaves));
602+
603+
SmallVector<Value *, 8> Buffer;
604+
// a0 a1 a2 a3 b0 b1 b2 b3
605+
// -> a0 b0 a1 b1 a2 b2 a3 b3
606+
for (unsigned i = 0U; i < NumLeaves; ++i)
607+
Buffer.push_back(SubLeaves[i / 2 + (i % 2 ? HalfLeaves : 0)]);
608+
609+
llvm::copy(Buffer, SubLeaves.begin());
610+
}
611+
612+
static bool
613+
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
614+
SmallVectorImpl<Instruction *> &DeadInsts) {
615+
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2);
616+
617+
// Visit with BFS
618+
SmallVector<IntrinsicInst *, 8> Queue;
619+
Queue.push_back(II);
620+
while (!Queue.empty()) {
621+
IntrinsicInst *Current = Queue.front();
622+
Queue.erase(Queue.begin());
623+
624+
// All the intermediate intrinsics will be deleted.
625+
DeadInsts.push_back(Current);
626+
627+
for (unsigned I = 0; I < 2; ++I) {
628+
Value *Op = Current->getOperand(I);
629+
if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
630+
if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
631+
Queue.push_back(OpII);
632+
continue;
633+
}
634+
635+
// If this is not a perfectly balanced tree, the leaf
636+
// result types would be different.
637+
if (!Operands.empty() && Op->getType() != Operands.back()->getType())
638+
return false;
639+
640+
Operands.push_back(Op);
641+
}
642+
}
643+
644+
const unsigned Factor = Operands.size();
645+
// Currently we only recognize power-of-two factors.
646+
// FIXME: should we assert here instead?
647+
if (Factor <= 1 || !isPowerOf2_32(Factor))
648+
return false;
649+
650+
interleaveLeafValues(Operands);
651+
return true;
652+
}
653+
654+
static bool
655+
getVectorDeinterleaveFactor(IntrinsicInst *II,
656+
SmallVectorImpl<Value *> &Results,
657+
SmallVectorImpl<Instruction *> &DeadInsts) {
658+
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2);
659+
using namespace PatternMatch;
660+
if (!II->hasNUses(2))
661+
return false;
662+
663+
// Visit with BFS
664+
SmallVector<IntrinsicInst *, 8> Queue;
665+
Queue.push_back(II);
666+
while (!Queue.empty()) {
667+
IntrinsicInst *Current = Queue.front();
668+
Queue.erase(Queue.begin());
669+
assert(Current->hasNUses(2));
670+
671+
// All the intermediate intrinsics will be deleted from the bottom-up.
672+
DeadInsts.insert(DeadInsts.begin(), Current);
673+
674+
ExtractValueInst *LHS = nullptr, *RHS = nullptr;
675+
for (User *Usr : Current->users()) {
676+
if (!isa<ExtractValueInst>(Usr))
677+
return 0;
678+
679+
auto *EV = cast<ExtractValueInst>(Usr);
680+
// Intermediate ExtractValue instructions will also be deleted.
681+
DeadInsts.insert(DeadInsts.begin(), EV);
682+
ArrayRef<unsigned> Indices = EV->getIndices();
683+
if (Indices.size() != 1)
684+
return false;
685+
686+
if (Indices[0] == 0 && !LHS)
687+
LHS = EV;
688+
else if (Indices[0] == 1 && !RHS)
689+
RHS = EV;
690+
else
691+
return false;
692+
}
693+
694+
// We have legal indices. At this point we're either going
695+
// to continue the traversal or push the leaf values into Results.
696+
for (ExtractValueInst *EV : {LHS, RHS}) {
697+
// Continue the traversal. We're playing safe here and matching only the
698+
// expression consisting of a perfectly balanced binary tree in which all
699+
// intermediate values are only used once.
700+
if (EV->hasOneUse() &&
701+
match(EV->user_back(),
702+
m_Intrinsic<Intrinsic::vector_deinterleave2>()) &&
703+
EV->user_back()->hasNUses(2)) {
704+
auto *EVUsr = cast<IntrinsicInst>(EV->user_back());
705+
Queue.push_back(EVUsr);
706+
continue;
707+
}
708+
709+
// If this is not a perfectly balanced tree, the leaf
710+
// result types would be different.
711+
if (!Results.empty() && EV->getType() != Results.back()->getType())
712+
return false;
713+
714+
// Save the leaf value.
715+
Results.push_back(EV);
716+
}
717+
}
718+
719+
const unsigned Factor = Results.size();
720+
// Currently we only recognize power-of-two factors.
721+
// FIXME: should we assert here instead?
722+
if (Factor <= 1 || !isPowerOf2_32(Factor))
723+
return 0;
724+
725+
interleaveLeafValues(Results);
726+
return true;
727+
}
728+
729+
// Return the corresponded deinterleaved mask, or nullptr if there is no valid
730+
// mask.
731+
static Value *getMask(Value *WideMask, unsigned Factor,
732+
ElementCount LeafValueEC) {
733+
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
734+
SmallVector<Value *, 8> Operands;
735+
SmallVector<Instruction *, 8> DeadInsts;
736+
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
737+
assert(!Operands.empty());
738+
if (Operands.size() == Factor && llvm::all_equal(Operands))
739+
return Operands[0];
740+
}
741+
}
742+
743+
if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
744+
if (auto *Splat = ConstMask->getSplatValue()) {
745+
// All-ones or all-zeros mask.
746+
return ConstantVector::getSplat(LeafValueEC, Splat);
747+
} else if (LeafValueEC.isFixed()) {
748+
unsigned LeafMaskLen = LeafValueEC.getFixedValue();
749+
SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
750+
// If this is a fixed-length constant mask, each lane / leaf has to
751+
// use the same mask. This is done by checking if every group with Factor
752+
// number of elements in the interleaved mask has homogeneous values.
753+
for (unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
754+
Constant *Ref = ConstMask->getAggregateElement(alignDown(Idx, Factor));
755+
if (Ref != ConstMask->getAggregateElement(Idx))
756+
return nullptr;
757+
LeafMask[Idx / Factor] = Ref;
758+
}
759+
760+
return ConstantVector::get(LeafMask);
761+
}
762+
}
763+
764+
return nullptr;
765+
}
766+
764767
bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
765768
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
766769
Value *LoadedVal = DI->getOperand(0);

0 commit comments

Comments
 (0)