Skip to content

Commit db2e655

Browse files
committed
[AMD][GISel] legalize G_INSERT_SUBVECTOR
1 parent 1188b1f commit db2e655

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,16 @@ class MachineIRBuilder {
13811381
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
13821382
}
13831383

1384+
MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
1385+
const SrcOp &Val,
1386+
const SrcOp &Elt,
1387+
const int Idx) {
1388+
auto TLI = getMF().getSubtarget().getTargetLowering();
1389+
unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
1390+
return buildInsertVectorElement(
1391+
Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
1392+
}
1393+
13841394
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
13851395
///
13861396
/// \pre setBasicBlock or setMI must have been called.

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
124124

125125
if (isa<GIntrinsic>(MI))
126126
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127+
if (isa<GInsertSubvector>(MI))
128+
return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
129+
: UnableToLegalize;
127130
auto Step = LI.getAction(MI, MRI);
128131
switch (Step.Action) {
129132
case Legal:

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ std::optional<DefinitionAndSourceRegister>
467467
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
468468
Register DefSrcReg = Reg;
469469
auto *DefMI = MRI.getVRegDef(Reg);
470+
assert(DefMI && "expected non-null machine instr");
470471
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
471472
if (!DstTy.isValid())
472473
return std::nullopt;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,6 +2131,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21312131
return legalizeExtractVectorElt(MI, MRI, B);
21322132
case TargetOpcode::G_INSERT_VECTOR_ELT:
21332133
return legalizeInsertVectorElt(MI, MRI, B);
2134+
case TargetOpcode::G_INSERT_SUBVECTOR:
2135+
return legalizeInsertSubVector(MI, MRI, B);
21342136
case TargetOpcode::G_FSIN:
21352137
case TargetOpcode::G_FCOS:
21362138
return legalizeSinCos(MI, MRI, B);
@@ -2828,6 +2830,64 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
28282830
return true;
28292831
}
28302832

2833+
// This lowers an G_INSERT_SUBVECTOR by extracting the individual elements from
2834+
// the small vector and inserting them into the big vector. That is better than
2835+
// the default expansion of doing it via a stack slot. Even though the use of
2836+
// the stack slot would be optimized away afterwards, the stack slot itself
2837+
// remains.
2838+
bool AMDGPULegalizerInfo::legalizeInsertSubVector(MachineInstr &MI,
2839+
MachineRegisterInfo &MRI,
2840+
MachineIRBuilder &B) const {
2841+
2842+
GInsertSubvector *ES = cast<GInsertSubvector>(&MI);
2843+
Register Vec = ES->getBigVec();
2844+
Register Ins = ES->getSubVec();
2845+
uint64_t IdxVal = ES->getIndexImm();
2846+
2847+
LLT VecVT = MRI.getType(Vec);
2848+
LLT InsVT = MRI.getType(Ins);
2849+
LLT EltVT = VecVT.getElementType();
2850+
assert(VecVT.getElementType() == InsVT.getElementType());
2851+
2852+
ElementCount InsVTEC = InsVT.getElementCount();
2853+
auto InsNumElts = InsVTEC.getKnownMinValue();
2854+
2855+
if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
2856+
// Insert 32-bit registers at a time.
2857+
assert(InsNumElts % 2 == 0 && "expect legal vector types");
2858+
2859+
ElementCount VecVTEC = VecVT.getElementCount();
2860+
LLT NewVecVT = LLT::vector(VecVTEC.divideCoefficientBy(2), S32);
2861+
LLT NewInsVT = InsNumElts == 2
2862+
? S32
2863+
: LLT::vector(InsVTEC.divideCoefficientBy(2), S32);
2864+
2865+
auto VecB = B.buildBitcast(NewVecVT, Vec);
2866+
auto InsB = B.buildBitcast(NewInsVT, Ins);
2867+
2868+
for (unsigned I = 0; I != InsNumElts / 2; ++I) {
2869+
MachineInstrBuilder Elt;
2870+
if (InsNumElts == 2) {
2871+
Elt = InsB;
2872+
} else {
2873+
Elt = B.buildExtractVectorElementConstant(S32, InsB, I);
2874+
}
2875+
VecB = B.buildInsertVectorElementConstant(NewVecVT, VecB, Elt, IdxVal / 2 + I);
2876+
}
2877+
auto R = B.buildBitcast(VecVT, VecB);
2878+
MI.eraseFromParent();
2879+
return true;
2880+
}
2881+
2882+
for (unsigned I = 0; I != InsNumElts; ++I) {
2883+
auto Elt = B.buildExtractVectorElementConstant(EltVT, Ins, I);
2884+
Vec = B.buildInsertVectorElementConstant(VecVT, Vec, Elt, IdxVal + I).getReg(0);
2885+
}
2886+
2887+
MI.eraseFromParent();
2888+
return true;
2889+
}
2890+
28312891
bool AMDGPULegalizerInfo::legalizeSinCos(
28322892
MachineInstr &MI, MachineRegisterInfo &MRI,
28332893
MachineIRBuilder &B) const {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
6060
MachineIRBuilder &B) const;
6161
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
6262
MachineIRBuilder &B) const;
63+
bool legalizeInsertSubVector(MachineInstr &MI, MachineRegisterInfo &MRI,
64+
MachineIRBuilder &B) const;
6365

6466
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
6567
MachineIRBuilder &B) const;

0 commit comments

Comments
 (0)