Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,16 @@ class MachineIRBuilder {
Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx));
}

MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
const SrcOp &Val,
const SrcOp &Elt,
const int Idx) {
auto TLI = getMF().getSubtarget().getTargetLowering();
unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
return buildInsertVectorElement(
Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
}

/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
///
/// \pre setBasicBlock or setMI must have been called.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,

if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
if (isa<GInsertSubvector>(MI))
return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
: UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ std::optional<DefinitionAndSourceRegister>
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
assert(DefMI && "expected non-null machine instr");
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
return std::nullopt;
Expand Down
60 changes: 60 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2144,6 +2144,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeExtractVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return legalizeInsertVectorElt(MI, MRI, B);
case TargetOpcode::G_INSERT_SUBVECTOR:
return legalizeInsertSubVector(MI, MRI, B);
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
return legalizeSinCos(MI, MRI, B);
Expand Down Expand Up @@ -2838,6 +2840,64 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
}

// This lowers an G_INSERT_SUBVECTOR by extracting the individual elements from
// the small vector and inserting them into the big vector. That is better than
// the default expansion of doing it via a stack slot. Even though the use of
// the stack slot would be optimized away afterwards, the stack slot itself
// remains.
bool AMDGPULegalizerInfo::legalizeInsertSubVector(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {

GInsertSubvector *ES = cast<GInsertSubvector>(&MI);
Register Vec = ES->getBigVec();
Register Ins = ES->getSubVec();
uint64_t IdxVal = ES->getIndexImm();

LLT VecVT = MRI.getType(Vec);
LLT InsVT = MRI.getType(Ins);
LLT EltVT = VecVT.getElementType();
assert(VecVT.getElementType() == InsVT.getElementType());

ElementCount InsVTEC = InsVT.getElementCount();
auto InsNumElts = InsVTEC.getKnownMinValue();

if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
// Insert 32-bit registers at a time.
assert(InsNumElts % 2 == 0 && "expect legal vector types");

ElementCount VecVTEC = VecVT.getElementCount();
LLT NewVecVT = LLT::vector(VecVTEC.divideCoefficientBy(2), S32);
LLT NewInsVT = InsNumElts == 2
? S32
: LLT::vector(InsVTEC.divideCoefficientBy(2), S32);

auto VecB = B.buildBitcast(NewVecVT, Vec);
auto InsB = B.buildBitcast(NewInsVT, Ins);

for (unsigned I = 0; I != InsNumElts / 2; ++I) {
MachineInstrBuilder Elt;
if (InsNumElts == 2) {
Elt = InsB;
} else {
Elt = B.buildExtractVectorElementConstant(S32, InsB, I);
}
VecB = B.buildInsertVectorElementConstant(NewVecVT, VecB, Elt, IdxVal / 2 + I);
}
auto R = B.buildBitcast(VecVT, VecB);
MI.eraseFromParent();
return true;
}

for (unsigned I = 0; I != InsNumElts; ++I) {
auto Elt = B.buildExtractVectorElementConstant(EltVT, Ins, I);
Vec = B.buildInsertVectorElementConstant(VecVT, Vec, Elt, IdxVal + I).getReg(0);
}

MI.eraseFromParent();
return true;
}

bool AMDGPULegalizerInfo::legalizeSinCos(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
MachineIRBuilder &B) const;
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeInsertSubVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;

bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
Expand Down
Loading