Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3234,7 +3234,7 @@ class TargetLoweringBase {

/// Lower a deinterleave intrinsic to a target specific load intrinsic.
/// Return true on success. Currently only supports
/// llvm.vector.deinterleave2
/// llvm.vector.deinterleave{2,3,5,7}
///
/// \p LI is the accompanying load instruction.
/// \p DeinterleaveValues contains the deinterleaved values.
Expand All @@ -3246,7 +3246,7 @@ class TargetLoweringBase {

/// Lower an interleave intrinsic to a target specific store intrinsic.
/// Return true on success. Currently only supports
/// llvm.vector.interleave2
/// llvm.vector.interleave{2,3,5,7}
///
/// \p SI is the accompanying store instruction
/// \p InterleaveValues contains the interleaved values.
Expand Down
75 changes: 56 additions & 19 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,25 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
return true;
}

static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
case Intrinsic::vector_deinterleave2:
case Intrinsic::vector_interleave2:
return 2;
case Intrinsic::vector_deinterleave3:
case Intrinsic::vector_interleave3:
return 3;
case Intrinsic::vector_deinterleave5:
case Intrinsic::vector_interleave5:
return 5;
case Intrinsic::vector_deinterleave7:
case Intrinsic::vector_interleave7:
return 7;
default:
llvm_unreachable("Unexpected intrinsic");
}
}

// For an (de)interleave tree like this:
//
// A C B D
Expand All @@ -586,7 +605,7 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
// to reorder them by interleaving these values.
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
unsigned NumLeaves = SubLeaves.size();
if (NumLeaves == 2)
if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves))
return;

assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
Expand All @@ -608,7 +627,10 @@ static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
static bool
getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
SmallVectorImpl<Instruction *> &DeadInsts) {
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2);
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 ||
II->getIntrinsicID() == Intrinsic::vector_interleave3 ||
II->getIntrinsicID() == Intrinsic::vector_interleave5 ||
II->getIntrinsicID() == Intrinsic::vector_interleave7);

// Visit with BFS
SmallVector<IntrinsicInst *, 8> Queue;
Expand All @@ -620,7 +642,7 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
// All the intermediate intrinsics will be deleted.
DeadInsts.push_back(Current);

for (unsigned I = 0; I < 2; ++I) {
for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) {
Value *Op = Current->getOperand(I);
if (auto *OpII = dyn_cast<IntrinsicInst>(Op))
if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) {
Expand All @@ -638,9 +660,10 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
}

const unsigned Factor = Operands.size();
// Currently we only recognize power-of-two factors.
// Currently we only recognize factors of 2, 3, 5 and 7.
// FIXME: should we assert here instead?
if (Factor <= 1 || !isPowerOf2_32(Factor))
if (Factor <= 1 ||
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return false;

interleaveLeafValues(Operands);
Expand All @@ -651,9 +674,12 @@ static bool
getVectorDeinterleaveFactor(IntrinsicInst *II,
SmallVectorImpl<Value *> &Results,
SmallVectorImpl<Instruction *> &DeadInsts) {
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2);
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave3 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave5 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave7);
using namespace PatternMatch;
if (!II->hasNUses(2))
if (!II->hasNUses(getIntrinsicFactor(II)))
return false;

// Visit with BFS
Expand All @@ -662,12 +688,12 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
while (!Queue.empty()) {
IntrinsicInst *Current = Queue.front();
Queue.erase(Queue.begin());
assert(Current->hasNUses(2));
assert(Current->hasNUses(getIntrinsicFactor(Current)));

// All the intermediate intrinsics will be deleted from the bottom-up.
DeadInsts.insert(DeadInsts.begin(), Current);

ExtractValueInst *LHS = nullptr, *RHS = nullptr;
SmallVector<ExtractValueInst *> EVs(getIntrinsicFactor(Current), nullptr);
for (User *Usr : Current->users()) {
if (!isa<ExtractValueInst>(Usr))
return 0;
Expand All @@ -679,17 +705,15 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
if (Indices.size() != 1)
return false;

if (Indices[0] == 0 && !LHS)
LHS = EV;
else if (Indices[0] == 1 && !RHS)
RHS = EV;
if (!EVs[Indices[0]])
EVs[Indices[0]] = EV;
else
return false;
}

// We have legal indices. At this point we're either going
// to continue the traversal or push the leaf values into Results.
for (ExtractValueInst *EV : {LHS, RHS}) {
for (ExtractValueInst *EV : EVs) {
// Continue the traversal. We're playing safe here and matching only the
// expression consisting of a perfectly balanced binary tree in which all
// intermediate values are only used once.
Expand All @@ -713,9 +737,10 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
}

const unsigned Factor = Results.size();
// Currently we only recognize power-of-two factors.
// Currently we only recognize factors of 2, 3, 5 and 7.
// FIXME: should we assert here instead?
if (Factor <= 1 || !isPowerOf2_32(Factor))
if (Factor <= 1 ||
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return 0;

interleaveLeafValues(Results);
Expand Down Expand Up @@ -878,11 +903,23 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {

if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
// At present, we only have intrinsics to represent (de)interleaving
// with a factor of 2.
if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
// with a factor of 2,3,5 and 7.
switch (II->getIntrinsicID()) {
case Intrinsic::vector_deinterleave2:
case Intrinsic::vector_deinterleave3:
case Intrinsic::vector_deinterleave5:
case Intrinsic::vector_deinterleave7:
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
else if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
break;
case Intrinsic::vector_interleave2:
case Intrinsic::vector_interleave3:
case Intrinsic::vector_interleave5:
case Intrinsic::vector_interleave7:
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
break;
default:
break;
}
}
}

Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,23 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p
ret {<2 x double>, <2 x double>} %res1
}

define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg3e8.v v6, (a0)
; CHECK-NEXT: ret
%vec = load <24 x i8>, ptr %p
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec)
%t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0
%t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1
%t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0
ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4:
; CHECK: # %bb.0:
Expand All @@ -281,6 +298,52 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor5(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor5:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = load <40 x i8>, ptr %p
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave5(<40 x i8> %vec)
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor7(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor7:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = load <56 x i8>, ptr %p
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave7(<56 x i8> %vec)
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
%t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5
%t6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 6
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t5, 5
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t6, 6
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6
}

define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) {
; CHECK-LABEL: vector_deinterleave_load_factor8:
; CHECK: # %bb.0:
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,17 @@ define void @vector_interleave_store_v4f64_v2f64(<2 x double> %a, <2 x double> %
ret void
}

define void @vector_interleave_store_factor3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor3:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg3e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <12 x i32> @llvm.vector.interleave3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
store <12 x i32> %v, ptr %p
ret void
}

define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor4:
; CHECK: # %bb.0:
Expand All @@ -194,6 +205,28 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3
ret void
}

define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor5:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg5e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <20 x i32> @llvm.vector.interleave5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e)
store <20 x i32> %v, ptr %p
ret void
}

define void @vector_interleave_store_factor7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor7:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg7e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <28 x i32> @llvm.vector.interleave7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g)
store <28 x i32> %v, ptr %p
ret void
}

define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor8:
; CHECK: # %bb.0:
Expand Down
Loading