Skip to content

Commit 6824bcf

Browse files
authored
[IA] Relax the requirement of having ExtractValue users on deinterleave intrinsic (#148716)
There are cases where InstCombine / InstSimplify might sink extractvalue instructions that use a deinterleave intrinsic into successor blocks, which prevents InterleavedAccess from kicking in because the current pattern requires deinterleave intrinsic to be used by extractvalue. However, this requirement is bit too strict while we could have just replaced the users of deinterleave intrinsic with whatever generated by the target TLI hooks.
1 parent ececa87 commit 6824bcf

14 files changed

+263
-104
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
namespace llvm {
2626
class TargetLibraryInfo;
27+
class IntrinsicInst;
2728

2829
/// The Vector Function Database.
2930
///
@@ -188,6 +189,10 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);
188189
/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
189190
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);
190191

192+
/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each
193+
/// factor.
194+
LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI);
195+
191196
/// Given a vector and an element number, see if the scalar value is
192197
/// already around as a register, for example if it were inserted then extracted
193198
/// from the vector.

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3251,10 +3251,9 @@ class LLVM_ABI TargetLoweringBase {
32513251
///
32523252
/// \p Load is the accompanying load instruction. Can be either a plain load
32533253
/// instruction or a vp.load intrinsic.
3254-
/// \p DeinterleaveValues contains the deinterleaved values.
3255-
virtual bool
3256-
lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
3257-
ArrayRef<Value *> DeinterleaveValues) const {
3254+
/// \p DI represents the deinterleaveN intrinsic.
3255+
virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
3256+
IntrinsicInst *DI) const {
32583257
return false;
32593258
}
32603259

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,15 @@ unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) {
306306
}
307307
}
308308

309+
VectorType *llvm::getDeinterleavedVectorType(IntrinsicInst *DI) {
310+
[[maybe_unused]] unsigned Factor =
311+
getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
312+
ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
313+
assert(Factor && Factor == DISubtypes.size() &&
314+
"unexpected deinterleave factor or result type");
315+
return cast<VectorType>(DISubtypes[0]);
316+
}
317+
309318
/// Given a vector and an element number, see if the scalar value is
310319
/// already around as a register, for example if it were inserted then extracted
311320
/// from the vector.

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -618,29 +618,13 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
618618
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
619619
assert(Factor && "unexpected deinterleave intrinsic");
620620

621-
SmallVector<Value *, 8> DeinterleaveValues(Factor, nullptr);
622-
Value *LastFactor = nullptr;
623-
for (auto *User : DI->users()) {
624-
auto *Extract = dyn_cast<ExtractValueInst>(User);
625-
if (!Extract || Extract->getNumIndices() != 1)
626-
return false;
627-
unsigned Idx = Extract->getIndices()[0];
628-
if (DeinterleaveValues[Idx])
629-
return false;
630-
DeinterleaveValues[Idx] = Extract;
631-
LastFactor = Extract;
632-
}
633-
634-
if (!LastFactor)
635-
return false;
636-
637621
Value *Mask = nullptr;
638622
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
639623
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
640624
return false;
641625
// Check mask operand. Handle both all-true/false and interleaved mask.
642626
Value *WideMask = VPLoad->getOperand(1);
643-
Mask = getMask(WideMask, Factor, cast<VectorType>(LastFactor->getType()));
627+
Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI));
644628
if (!Mask)
645629
return false;
646630

@@ -657,12 +641,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
657641

658642
// Try and match this with target specific intrinsics.
659643
if (!TLI->lowerDeinterleaveIntrinsicToLoad(cast<Instruction>(LoadedVal), Mask,
660-
DeinterleaveValues))
644+
DI))
661645
return false;
662646

663-
for (Value *V : DeinterleaveValues)
664-
if (V)
665-
DeadInsts.insert(cast<Instruction>(V));
666647
DeadInsts.insert(DI);
667648
// We now have a target-specific load, so delete the old one.
668649
DeadInsts.insert(cast<Instruction>(LoadedVal));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17486,9 +17486,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1748617486
}
1748717487

1748817488
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
17489-
Instruction *Load, Value *Mask,
17490-
ArrayRef<Value *> DeinterleavedValues) const {
17491-
unsigned Factor = DeinterleavedValues.size();
17489+
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
17490+
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
1749217491
if (Factor != 2 && Factor != 4) {
1749317492
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
1749417493
return false;
@@ -17498,9 +17497,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1749817497
return false;
1749917498
assert(!Mask && "Unexpected mask on a load\n");
1750017499

17501-
Value *FirstActive = *llvm::find_if(DeinterleavedValues,
17502-
[](Value *V) { return V != nullptr; });
17503-
VectorType *VTy = cast<VectorType>(FirstActive->getType());
17500+
VectorType *VTy = getDeinterleavedVectorType(DI);
1750417501

1750517502
const DataLayout &DL = LI->getModule()->getDataLayout();
1750617503
bool UseScalable;
@@ -17528,6 +17525,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1752817525
Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
1752917526

1753017527
Value *BaseAddr = LI->getPointerOperand();
17528+
Value *Result = nullptr;
1753117529
if (NumLoads > 1) {
1753217530
// Create multiple legal small ldN.
1753317531
SmallVector<Value *, 4> ExtractedLdValues(Factor, PoisonValue::get(VTy));
@@ -17548,25 +17546,20 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1754817546
}
1754917547
LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump());
1755017548
}
17551-
// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
17552-
for (unsigned J = 0; J < Factor; ++J) {
17553-
if (DeinterleavedValues[J])
17554-
DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]);
17555-
}
17549+
17550+
// Merge the values from different factors.
17551+
Result = PoisonValue::get(DI->getType());
17552+
for (unsigned J = 0; J < Factor; ++J)
17553+
Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J);
1755617554
} else {
17557-
Value *Result;
1755817555
if (UseScalable)
1755917556
Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
1756017557
else
1756117558
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
17562-
// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
17563-
for (unsigned I = 0; I < Factor; I++) {
17564-
if (DeinterleavedValues[I]) {
17565-
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17566-
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17567-
}
17568-
}
1756917559
}
17560+
17561+
// Replace output of deinterleave2 intrinsic by output of ldN2/ldN4
17562+
DI->replaceAllUsesWith(Result);
1757017563
return true;
1757117564
}
1757217565

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,8 @@ class AArch64TargetLowering : public TargetLowering {
218218
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
219219
unsigned Factor) const override;
220220

221-
bool lowerDeinterleaveIntrinsicToLoad(
222-
Instruction *Load, Value *Mask,
223-
ArrayRef<Value *> DeinterleaveValues) const override;
221+
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
222+
IntrinsicInst *DI) const override;
224223

225224
bool lowerInterleaveIntrinsicToStore(
226225
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,8 @@ class RISCVTargetLowering : public TargetLowering {
437437
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
438438
unsigned Factor) const override;
439439

440-
bool lowerDeinterleaveIntrinsicToLoad(
441-
Instruction *Load, Value *Mask,
442-
ArrayRef<Value *> DeinterleaveValues) const override;
440+
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
441+
IntrinsicInst *DI) const override;
443442

444443
bool lowerInterleaveIntrinsicToStore(
445444
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "RISCVISelLowering.h"
1515
#include "RISCVSubtarget.h"
1616
#include "llvm/Analysis/ValueTracking.h"
17+
#include "llvm/Analysis/VectorUtils.h"
1718
#include "llvm/CodeGen/ValueTypes.h"
1819
#include "llvm/IR/IRBuilder.h"
1920
#include "llvm/IR/Instructions.h"
@@ -256,17 +257,14 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
256257
}
257258

258259
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
259-
Instruction *Load, Value *Mask,
260-
ArrayRef<Value *> DeinterleaveValues) const {
261-
const unsigned Factor = DeinterleaveValues.size();
260+
Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
261+
const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
262262
if (Factor > 8)
263263
return false;
264264

265265
IRBuilder<> Builder(Load);
266266

267-
Value *FirstActive =
268-
*llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
269-
VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
267+
VectorType *ResVTy = getDeinterleavedVectorType(DI);
270268

271269
const DataLayout &DL = Load->getDataLayout();
272270
auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
@@ -346,16 +344,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
346344
}
347345
}
348346

349-
for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
350-
if (!DIV)
351-
continue;
352-
// We have to create a brand new ExtractValue to replace each
353-
// of these old ExtractValue instructions.
354-
Value *NewEV =
355-
Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
356-
DIV->replaceAllUsesWith(NewEV);
357-
}
358-
347+
DI->replaceAllUsesWith(Return);
359348
return true;
360349
}
361350

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,59 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p
274274
ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2
275275
}
276276

277+
define { <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_partial(ptr %p) {
278+
; CHECK-LABEL: vector_deinterleave_load_factor3_partial:
279+
; CHECK: # %bb.0:
280+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
281+
; CHECK-NEXT: vlseg3e8.v v7, (a0)
282+
; CHECK-NEXT: vmv1r.v v8, v7
283+
; CHECK-NEXT: ret
284+
%vec = load <24 x i8>, ptr %p
285+
%d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec)
286+
%t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0
287+
%t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2
288+
%res0 = insertvalue { <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
289+
%res1 = insertvalue { <8 x i8>, <8 x i8> } %res0, <8 x i8> %t2, 1
290+
ret { <8 x i8>, <8 x i8> } %res1
291+
}
292+
293+
; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue.
294+
define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_no_extract(ptr %p, ptr %p1, i1 %c) {
295+
; CHECK-LABEL: vector_deinterleave_load_factor3_no_extract:
296+
; CHECK: # %bb.0:
297+
; CHECK-NEXT: andi a2, a2, 1
298+
; CHECK-NEXT: beqz a2, .LBB17_2
299+
; CHECK-NEXT: # %bb.1: # %bb0
300+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
301+
; CHECK-NEXT: vlseg3e8.v v6, (a0)
302+
; CHECK-NEXT: ret
303+
; CHECK-NEXT: .LBB17_2: # %bb1
304+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
305+
; CHECK-NEXT: vlseg3e8.v v6, (a1)
306+
; CHECK-NEXT: ret
307+
br i1 %c, label %bb0, label %bb1
308+
309+
bb0:
310+
%vec0 = load <24 x i8>, ptr %p
311+
%d0.0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec0)
312+
br label %merge
313+
314+
bb1:
315+
%vec1 = load <24 x i8>, ptr %p1
316+
%d0.1 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec1)
317+
br label %merge
318+
319+
merge:
320+
%d0 = phi {<8 x i8>, <8 x i8>, <8 x i8>} [%d0.0, %bb0], [%d0.1, %bb1]
321+
%t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0
322+
%t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1
323+
%t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2
324+
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
325+
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0
326+
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0
327+
ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2
328+
}
329+
277330
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) {
278331
; CHECK-LABEL: vector_deinterleave_load_factor4:
279332
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)