Skip to content

Commit ae042a2

Browse files
arsenmmemfrob
authored andcommitted
GlobalISel: Implement fewerElementsVector for G_EXTRACT_VECTOR_ELT
Use the same basic strategy as LegalizeVectorTypes. Try to index into smaller pieces if there's a constant index, and otherwise fall back to a stack temporary.
1 parent 6a23995 commit ae042a2

File tree

6 files changed

+1195
-75
lines changed

6 files changed

+1195
-75
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ class LegalizerHelper {
279279
LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI,
280280
unsigned TypeIdx,
281281
LLT NarrowTy);
282+
LegalizeResult fewerElementsVectorExtractVectorElt(MachineInstr &MI,
283+
unsigned TypeIdx,
284+
LLT NarrowTy);
282285

283286
LegalizeResult
284287
reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3472,6 +3472,59 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
34723472
return Legalized;
34733473
}
34743474

3475+
LegalizerHelper::LegalizeResult
3476+
LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI,
3477+
unsigned TypeIdx,
3478+
LLT NarrowVecTy) {
3479+
assert(TypeIdx == 1 && "not a vector type index");
3480+
3481+
// TODO: Handle total scalarization case.
3482+
if (!NarrowVecTy.isVector())
3483+
return UnableToLegalize;
3484+
3485+
Register DstReg = MI.getOperand(0).getReg();
3486+
Register SrcVec = MI.getOperand(1).getReg();
3487+
Register Idx = MI.getOperand(2).getReg();
3488+
LLT VecTy = MRI.getType(SrcVec);
3489+
3490+
// If the index is a constant, we can really break this down as you would
3491+
// expect, and index into the target size pieces.
3492+
int64_t IdxVal;
3493+
if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3494+
// Avoid out of bounds indexing the pieces.
3495+
if (IdxVal >= VecTy.getNumElements()) {
3496+
MIRBuilder.buildUndef(DstReg);
3497+
MI.eraseFromParent();
3498+
return Legalized;
3499+
}
3500+
3501+
SmallVector<Register, 8> VecParts;
3502+
LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3503+
3504+
// Build a sequence of NarrowTy pieces in VecParts for this operand.
3505+
buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3506+
TargetOpcode::G_ANYEXT);
3507+
3508+
unsigned NewNumElts = NarrowVecTy.getNumElements();
3509+
3510+
LLT IdxTy = MRI.getType(Idx);
3511+
int64_t PartIdx = IdxVal / NewNumElts;
3512+
auto NewIdx =
3513+
MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3514+
3515+
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3516+
MI.eraseFromParent();
3517+
return Legalized;
3518+
}
3519+
3520+
// With a variable index, we can't perform the extract in a smaller type, so
3521+
// we're forced to expand this.
3522+
//
3523+
// TODO: We could emit a chain of compare/select to figure out which piece to
3524+
// index.
3525+
return lowerExtractVectorElt(MI);
3526+
}
3527+
34753528
LegalizerHelper::LegalizeResult
34763529
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
34773530
LLT NarrowTy) {
@@ -3801,6 +3854,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
38013854
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
38023855
case G_BUILD_VECTOR:
38033856
return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3857+
case G_EXTRACT_VECTOR_ELT:
3858+
return fewerElementsVectorExtractVectorElt(MI, TypeIdx, NarrowTy);
38043859
case G_LOAD:
38053860
case G_STORE:
38063861
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1340,7 +1340,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
13401340
.clampScalar(EltTypeIdx, S32, S64)
13411341
.clampScalar(VecTypeIdx, S32, S64)
13421342
.clampScalar(IdxTypeIdx, S32, S32)
1343-
// TODO: Clamp the number of elements before resorting to stack lowering.
1343+
.clampMaxNumElements(1, S32, 32)
1344+
// TODO: Clamp elements for 64-bit vectors?
13441345
// It should only be necessary with variable indexes.
13451346
// As a last resort, lower to the stack
13461347
.lower();

0 commit comments

Comments
 (0)