Skip to content

Commit 0604176

Browse files
Rework to make AArch64 specific & handle splitting into legal chunks
1 parent 39c3e04 commit 0604176

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ def G_VLSHR : AArch64GenericInstruction {
149149
let hasSideEffects = 0;
150150
}
151151

152+
// Float truncation using round to odd
153+
def G_FPTRUNC_ODD : AArch64GenericInstruction {
154+
let OutOperandList = (outs type0:$dst);
155+
let InOperandList = (ins type1:$src);
156+
let hasSideEffects = false;
157+
}
158+
152159
// Represents an integer to FP conversion on the FPR bank.
153160
def G_SITOF : AArch64GenericInstruction {
154161
let OutOperandList = (outs type0:$dst);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
819819
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
820820
.libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
821821
.moreElementsToNextPow2(1)
822-
.lowerIf([](const LegalityQuery &Q) {
822+
.customIf([](const LegalityQuery &Q) {
823823
LLT DstTy = Q.Types[0];
824824
LLT SrcTy = Q.Types[1];
825825
return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
@@ -1474,6 +1474,10 @@ bool AArch64LegalizerInfo::legalizeCustom(
14741474
return legalizeICMP(MI, MRI, MIRBuilder);
14751475
case TargetOpcode::G_BITCAST:
14761476
return legalizeBitcast(MI, Helper);
1477+
case TargetOpcode::G_FPTRUNC:
1478+
// In order to vectorise f16 to f64 properly, we need to use f32 as an
1479+
// intermediary
1480+
return legalizeFptrunc(MI, MIRBuilder, MRI);
14771481
}
14781482

14791483
llvm_unreachable("expected switch to return");
@@ -2400,3 +2404,80 @@ bool AArch64LegalizerInfo::legalizePrefetch(MachineInstr &MI,
24002404
MI.eraseFromParent();
24012405
return true;
24022406
}
2407+
2408+
bool AArch64LegalizerInfo::legalizeFptrunc(MachineInstr &MI,
2409+
MachineIRBuilder &MIRBuilder,
2410+
MachineRegisterInfo &MRI) const {
2411+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2412+
assert(SrcTy.isFixedVector() && isPowerOf2_32(SrcTy.getNumElements()) &&
2413+
"Expected a power of 2 elements");
2414+
2415+
LLT s16 = LLT::scalar(16);
2416+
LLT s32 = LLT::scalar(32);
2417+
LLT s64 = LLT::scalar(64);
2418+
LLT v2s16 = LLT::fixed_vector(2, s16);
2419+
LLT v4s16 = LLT::fixed_vector(4, s16);
2420+
LLT v2s32 = LLT::fixed_vector(2, s32);
2421+
LLT v4s32 = LLT::fixed_vector(4, s32);
2422+
LLT v2s64 = LLT::fixed_vector(2, s64);
2423+
2424+
SmallVector<Register> RegsToUnmergeTo;
2425+
SmallVector<Register> TruncOddDstRegs;
2426+
SmallVector<Register> RegsToMerge;
2427+
2428+
unsigned ElemCount = SrcTy.getNumElements();
2429+
2430+
// Find the biggest size chunks we can work with
2431+
int StepSize = ElemCount % 4 ? 2 : 4;
2432+
2433+
// If we have a power of 2 greater than 2, we need to first unmerge into
2434+
// enough pieces
2435+
if (ElemCount <= 2)
2436+
RegsToUnmergeTo.push_back(Src);
2437+
else {
2438+
for (unsigned i = 0; i < ElemCount / 2; ++i) {
2439+
RegsToUnmergeTo.push_back(MRI.createGenericVirtualRegister(v2s64));
2440+
}
2441+
2442+
MIRBuilder.buildUnmerge(RegsToUnmergeTo, Src);
2443+
}
2444+
2445+
// Create all of the round-to-odd instructions and store them
2446+
for (auto SrcReg : RegsToUnmergeTo) {
2447+
Register Mid =
2448+
MIRBuilder.buildInstr(AArch64::G_FPTRUNC_ODD, {v2s32}, {SrcReg})
2449+
.getReg(0);
2450+
TruncOddDstRegs.push_back(Mid);
2451+
}
2452+
2453+
// Truncate 4s32 to 4s16 if we can to reduce instruction count, otherwise truncate 2s32 to 2s16.
2454+
unsigned Index = 0;
2455+
for (unsigned LoopIter = 0; LoopIter < ElemCount / StepSize; ++LoopIter) {
2456+
if (StepSize == 4) {
2457+
Register ConcatDst =
2458+
MIRBuilder
2459+
.buildMergeLikeInstr(
2460+
{v4s32}, {TruncOddDstRegs[Index++], TruncOddDstRegs[Index++]})
2461+
.getReg(0);
2462+
2463+
RegsToMerge.push_back(
2464+
MIRBuilder.buildFPTrunc(v4s16, ConcatDst).getReg(0));
2465+
} else {
2466+
RegsToMerge.push_back(
2467+
MIRBuilder.buildFPTrunc(v2s16, TruncOddDstRegs[Index++]).getReg(0));
2468+
}
2469+
}
2470+
2471+
// If there is only one register, replace the destination
2472+
if (RegsToMerge.size() == 1) {
2473+
MRI.replaceRegWith(Dst, RegsToMerge.pop_back_val());
2474+
MI.eraseFromParent();
2475+
return true;
2476+
}
2477+
2478+
// Merge the rest of the instructions & replace the register
2479+
Register Fin = MIRBuilder.buildMergeLikeInstr(DstTy, RegsToMerge).getReg(0);
2480+
MRI.replaceRegWith(Dst, Fin);
2481+
MI.eraseFromParent();
2482+
return true;
2483+
}

0 commit comments

Comments
 (0)