Skip to content

Commit 3009be0

Browse files
committed
[AMD][GISel] legalize G_INSERT_SUBVECTOR
1 parent 147d9d6 commit 3009be0

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,16 @@ class MachineIRBuilder {
13811381
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
13821382
}
13831383

1384+
MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
1385+
const SrcOp &Val,
1386+
const SrcOp &Elt,
1387+
const int Idx) {
1388+
auto TLI = getMF().getSubtarget().getTargetLowering();
1389+
unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
1390+
return buildInsertVectorElement(
1391+
Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
1392+
}
1393+
13841394
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
13851395
///
13861396
/// \pre setBasicBlock or setMI must have been called.

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
124124

125125
if (isa<GIntrinsic>(MI))
126126
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
127+
if (isa<GInsertSubvector>(MI))
128+
return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
129+
: UnableToLegalize;
127130
auto Step = LI.getAction(MI, MRI);
128131
switch (Step.Action) {
129132
case Legal:

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ std::optional<DefinitionAndSourceRegister>
467467
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
468468
Register DefSrcReg = Reg;
469469
auto *DefMI = MRI.getVRegDef(Reg);
470+
assert(DefMI && "expected non-null machine instr");
470471
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
471472
if (!DstTy.isValid())
472473
return std::nullopt;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2144,6 +2144,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21442144
return legalizeExtractVectorElt(MI, MRI, B);
21452145
case TargetOpcode::G_INSERT_VECTOR_ELT:
21462146
return legalizeInsertVectorElt(MI, MRI, B);
2147+
case TargetOpcode::G_INSERT_SUBVECTOR:
2148+
return legalizeInsertSubVector(MI, MRI, B);
21472149
case TargetOpcode::G_FSIN:
21482150
case TargetOpcode::G_FCOS:
21492151
return legalizeSinCos(MI, MRI, B);
@@ -2838,6 +2840,64 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
28382840
return true;
28392841
}
28402842

2843+
// This lowers an G_INSERT_SUBVECTOR by extracting the individual elements from
2844+
// the small vector and inserting them into the big vector. That is better than
2845+
// the default expansion of doing it via a stack slot. Even though the use of
2846+
// the stack slot would be optimized away afterwards, the stack slot itself
2847+
// remains.
2848+
bool AMDGPULegalizerInfo::legalizeInsertSubVector(MachineInstr &MI,
2849+
MachineRegisterInfo &MRI,
2850+
MachineIRBuilder &B) const {
2851+
2852+
GInsertSubvector *ES = cast<GInsertSubvector>(&MI);
2853+
Register Vec = ES->getBigVec();
2854+
Register Ins = ES->getSubVec();
2855+
uint64_t IdxVal = ES->getIndexImm();
2856+
2857+
LLT VecVT = MRI.getType(Vec);
2858+
LLT InsVT = MRI.getType(Ins);
2859+
LLT EltVT = VecVT.getElementType();
2860+
assert(VecVT.getElementType() == InsVT.getElementType());
2861+
2862+
ElementCount InsVTEC = InsVT.getElementCount();
2863+
auto InsNumElts = InsVTEC.getKnownMinValue();
2864+
2865+
if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
2866+
// Insert 32-bit registers at a time.
2867+
assert(InsNumElts % 2 == 0 && "expect legal vector types");
2868+
2869+
ElementCount VecVTEC = VecVT.getElementCount();
2870+
LLT NewVecVT = LLT::vector(VecVTEC.divideCoefficientBy(2), S32);
2871+
LLT NewInsVT = InsNumElts == 2
2872+
? S32
2873+
: LLT::vector(InsVTEC.divideCoefficientBy(2), S32);
2874+
2875+
auto VecB = B.buildBitcast(NewVecVT, Vec);
2876+
auto InsB = B.buildBitcast(NewInsVT, Ins);
2877+
2878+
for (unsigned I = 0; I != InsNumElts / 2; ++I) {
2879+
MachineInstrBuilder Elt;
2880+
if (InsNumElts == 2) {
2881+
Elt = InsB;
2882+
} else {
2883+
Elt = B.buildExtractVectorElementConstant(S32, InsB, I);
2884+
}
2885+
VecB = B.buildInsertVectorElementConstant(NewVecVT, VecB, Elt, IdxVal / 2 + I);
2886+
}
2887+
auto R = B.buildBitcast(VecVT, VecB);
2888+
MI.eraseFromParent();
2889+
return true;
2890+
}
2891+
2892+
for (unsigned I = 0; I != InsNumElts; ++I) {
2893+
auto Elt = B.buildExtractVectorElementConstant(EltVT, Ins, I);
2894+
Vec = B.buildInsertVectorElementConstant(VecVT, Vec, Elt, IdxVal + I).getReg(0);
2895+
}
2896+
2897+
MI.eraseFromParent();
2898+
return true;
2899+
}
2900+
28412901
bool AMDGPULegalizerInfo::legalizeSinCos(
28422902
MachineInstr &MI, MachineRegisterInfo &MRI,
28432903
MachineIRBuilder &B) const {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
6060
MachineIRBuilder &B) const;
6161
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
6262
MachineIRBuilder &B) const;
63+
bool legalizeInsertSubVector(MachineInstr &MI, MachineRegisterInfo &MRI,
64+
MachineIRBuilder &B) const;
6365

6466
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
6567
MachineIRBuilder &B) const;

0 commit comments

Comments
 (0)