Skip to content

Commit 4ee80ca

Browse files
authored
[GlobalISel] Add support for scalarizing vector insert and extract elements (llvm#153274)
This Adds scalarization handling for fewer vector elements of insert and extract, so that i128 and fp128 types can be handled if they make it past combines. Inserts are unmerged with the inserted element added to the remerged vector, extracts are unmerged then the correct element is copied into the destination. With a non-constant vector the usual stack lowering is used.
1 parent 88438ba commit 4ee80ca

File tree

3 files changed

+331
-166
lines changed

3 files changed

+331
-166
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5222,53 +5222,59 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
52225222
InsertVal = MI.getOperand(2).getReg();
52235223

52245224
Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5225-
5226-
// TODO: Handle total scalarization case.
5227-
if (!NarrowVecTy.isVector())
5228-
return UnableToLegalize;
5229-
52305225
LLT VecTy = MRI.getType(SrcVec);
52315226

52325227
// If the index is a constant, we can really break this down as you would
52335228
// expect, and index into the target size pieces.
5234-
int64_t IdxVal;
52355229
auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
52365230
if (MaybeCst) {
5237-
IdxVal = MaybeCst->Value.getSExtValue();
5231+
uint64_t IdxVal = MaybeCst->Value.getZExtValue();
52385232
// Avoid out of bounds indexing the pieces.
52395233
if (IdxVal >= VecTy.getNumElements()) {
52405234
MIRBuilder.buildUndef(DstReg);
52415235
MI.eraseFromParent();
52425236
return Legalized;
52435237
}
52445238

5245-
SmallVector<Register, 8> VecParts;
5246-
LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5239+
if (!NarrowVecTy.isVector()) {
5240+
SmallVector<Register, 8> SplitPieces;
5241+
extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5242+
VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5243+
if (IsInsert) {
5244+
SplitPieces[IdxVal] = InsertVal;
5245+
MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5246+
} else {
5247+
MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5248+
}
5249+
} else {
5250+
SmallVector<Register, 8> VecParts;
5251+
LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
52475252

5248-
// Build a sequence of NarrowTy pieces in VecParts for this operand.
5249-
LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5250-
TargetOpcode::G_ANYEXT);
5253+
// Build a sequence of NarrowTy pieces in VecParts for this operand.
5254+
LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5255+
TargetOpcode::G_ANYEXT);
52515256

5252-
unsigned NewNumElts = NarrowVecTy.getNumElements();
5257+
unsigned NewNumElts = NarrowVecTy.getNumElements();
52535258

5254-
LLT IdxTy = MRI.getType(Idx);
5255-
int64_t PartIdx = IdxVal / NewNumElts;
5256-
auto NewIdx =
5257-
MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5259+
LLT IdxTy = MRI.getType(Idx);
5260+
int64_t PartIdx = IdxVal / NewNumElts;
5261+
auto NewIdx =
5262+
MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
52585263

5259-
if (IsInsert) {
5260-
LLT PartTy = MRI.getType(VecParts[PartIdx]);
5264+
if (IsInsert) {
5265+
LLT PartTy = MRI.getType(VecParts[PartIdx]);
52615266

5262-
// Use the adjusted index to insert into one of the subvectors.
5263-
auto InsertPart = MIRBuilder.buildInsertVectorElement(
5264-
PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5265-
VecParts[PartIdx] = InsertPart.getReg(0);
5267+
// Use the adjusted index to insert into one of the subvectors.
5268+
auto InsertPart = MIRBuilder.buildInsertVectorElement(
5269+
PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5270+
VecParts[PartIdx] = InsertPart.getReg(0);
52665271

5267-
// Recombine the inserted subvector with the others to reform the result
5268-
// vector.
5269-
buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5270-
} else {
5271-
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5272+
// Recombine the inserted subvector with the others to reform the result
5273+
// vector.
5274+
buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5275+
} else {
5276+
MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5277+
}
52725278
}
52735279

52745280
MI.eraseFromParent();

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
11501150
.clampMaxNumElements(1, s32, 4)
11511151
.clampMaxNumElements(1, s16, 8)
11521152
.clampMaxNumElements(1, s8, 16)
1153-
.clampMaxNumElements(1, p0, 2);
1153+
.clampMaxNumElements(1, p0, 2)
1154+
.scalarizeIf(scalarOrEltWiderThan(1, 64), 1);
11541155

11551156
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
11561157
.legalIf(
@@ -1165,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
11651166
.clampNumElements(0, v4s16, v8s16)
11661167
.clampNumElements(0, v2s32, v4s32)
11671168
.clampMaxNumElements(0, s64, 2)
1168-
.clampMaxNumElements(0, p0, 2);
1169+
.clampMaxNumElements(0, p0, 2)
1170+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
11691171

11701172
getActionDefinitionsBuilder(G_BUILD_VECTOR)
11711173
.legalFor({{v8s8, s8},

0 commit comments

Comments
 (0)