Skip to content

Commit c7355c4

Browse files
committed
FPInfo: IRTranslator and CallLowering
1 parent d69896b commit c7355c4

File tree

3 files changed

+130
-49
lines changed

3 files changed

+130
-49
lines changed

llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Lines changed: 78 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/CodeGen/MachineOperand.h"
2121
#include "llvm/CodeGen/MachineRegisterInfo.h"
2222
#include "llvm/CodeGen/TargetLowering.h"
23+
#include "llvm/CodeGen/TargetOpcodes.h"
2324
#include "llvm/IR/DataLayout.h"
2425
#include "llvm/IR/LLVMContext.h"
2526
#include "llvm/IR/Module.h"
@@ -409,11 +410,19 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
409410
// Sometimes pointers are passed zero extended.
410411
LLT OrigTy = MRI.getType(OrigRegs[0]);
411412
if (OrigTy.isPointer()) {
412-
LLT IntPtrTy = LLT::scalar(OrigTy.getSizeInBits());
413+
LLT IntPtrTy = LLT::integer(OrigTy.getSizeInBits());
413414
B.buildIntToPtr(OrigRegs[0], B.buildTrunc(IntPtrTy, SrcReg));
414415
return;
415416
}
416417

418+
if (LocTy.isFloat() || LocTy.isFloatVector())
419+
SrcReg = B.buildBitcast(LocTy.dropType(), SrcReg).getReg(0);
420+
421+
if (OrigTy.isFloat() || OrigTy.isFloatVector()) {
422+
B.buildBitcast(OrigRegs[0], B.buildTrunc(OrigTy.dropType(), SrcReg));
423+
return;
424+
}
425+
417426
B.buildTrunc(OrigRegs[0], SrcReg);
418427
return;
419428
}
@@ -423,10 +432,15 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
423432
LLT OrigTy = MRI.getType(OrigRegs[0]);
424433

425434
unsigned SrcSize = PartLLT.getSizeInBits().getFixedValue() * Regs.size();
426-
if (SrcSize == OrigTy.getSizeInBits())
427-
B.buildMergeValues(OrigRegs[0], Regs);
428-
else {
429-
auto Widened = B.buildMergeLikeInstr(LLT::scalar(SrcSize), Regs);
435+
if (SrcSize == OrigTy.getSizeInBits()) {
436+
if (OrigTy.isFloat()) {
437+
auto Merge = B.buildMergeValues(OrigTy.dropType(), Regs);
438+
B.buildBitcast(OrigRegs[0], Merge);
439+
} else {
440+
B.buildMergeValues(OrigRegs[0], Regs);
441+
}
442+
} else {
443+
auto Widened = B.buildMergeLikeInstr(LLT::integer(SrcSize), Regs);
430444
B.buildTrunc(OrigRegs[0], Widened);
431445
}
432446

@@ -492,19 +506,25 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
492506
SmallVector<Register, 8> EltMerges;
493507
int PartsPerElt =
494508
divideCeil(DstEltTy.getSizeInBits(), PartLLT.getSizeInBits());
495-
LLT ExtendedPartTy = LLT::scalar(PartLLT.getSizeInBits() * PartsPerElt);
509+
LLT ExtendedPartTy = LLT::integer(PartLLT.getSizeInBits() * PartsPerElt);
496510

497511
for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
498512
auto Merge =
499513
B.buildMergeLikeInstr(ExtendedPartTy, Regs.take_front(PartsPerElt));
500514
if (ExtendedPartTy.getSizeInBits() > RealDstEltTy.getSizeInBits())
501515
Merge = B.buildTrunc(RealDstEltTy, Merge);
502516
// Fix the type in case this is really a vector of pointers.
503-
MRI.setType(Merge.getReg(0), RealDstEltTy);
504-
EltMerges.push_back(Merge.getReg(0));
517+
Register MergeReg = Merge.getReg(0);
518+
519+
if (RealDstEltTy.isPointer()) {
520+
MRI.setType(MergeReg, RealDstEltTy);
521+
} else if (RealDstEltTy.isFloat() &&
522+
!MRI.getType(MergeReg).getScalarType().isFloat()) {
523+
MergeReg = B.buildBitcast(RealDstEltTy, MergeReg).getReg(0);
524+
}
525+
EltMerges.push_back(MergeReg);
505526
Regs = Regs.drop_front(PartsPerElt);
506527
}
507-
508528
B.buildBuildVector(OrigRegs[0], EltMerges);
509529
} else {
510530
// Vector was split, and elements promoted to a wider type.
@@ -532,9 +552,12 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
532552
SmallVector<Register, 0> BVRegs;
533553
BVRegs.reserve(Regs.size() * EltPerReg);
534554
for (Register R : Regs) {
535-
auto Unmerge = B.buildUnmerge(OriginalEltTy, R);
536-
for (unsigned K = 0; K < EltPerReg; ++K)
537-
BVRegs.push_back(B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0));
555+
auto Unmerge = B.buildUnmerge(OriginalEltTy.dropType(), R);
556+
for (unsigned K = 0; K < EltPerReg; ++K) {
557+
Register BVreg;
558+
BVreg = B.buildAnyExt(PartLLT, Unmerge.getReg(K)).getReg(0);
559+
BVRegs.push_back(BVreg);
560+
}
538561
}
539562

540563
// We may have some more elements in BVRegs, e.g. if we have 2 s32 pieces
@@ -545,7 +568,17 @@ static void buildCopyFromRegs(MachineIRBuilder &B, ArrayRef<Register> OrigRegs,
545568
}
546569
BuildVec = B.buildBuildVector(BVType, BVRegs).getReg(0);
547570
}
548-
B.buildTrunc(OrigRegs[0], BuildVec);
571+
572+
if (BVType.isFloatVector()) {
573+
BuildVec = B.buildBitcast(BVType.dropType(), BuildVec).getReg(0);
574+
}
575+
576+
LLT OrigTy = MRI.getType(OrigRegs[0]);
577+
if (OrigTy.isFloatVector()) {
578+
B.buildBitcast(OrigRegs[0], B.buildTrunc(OrigTy.dropType(), BuildVec));
579+
} else {
580+
B.buildTrunc(OrigRegs[0], BuildVec);
581+
}
549582
}
550583
}
551584

@@ -573,8 +606,18 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
573606
TypeSize::isKnownGT(PartSize, SrcTy.getElementType().getSizeInBits())) {
574607
// Vector was scalarized, and the elements extended.
575608
auto UnmergeToEltTy = B.buildUnmerge(SrcTy.getElementType(), SrcReg);
576-
for (int i = 0, e = DstRegs.size(); i != e; ++i)
577-
B.buildAnyExt(DstRegs[i], UnmergeToEltTy.getReg(i));
609+
for (int i = 0, e = DstRegs.size(); i != e; ++i) {
610+
Register Unmerge = UnmergeToEltTy.getReg(i);
611+
if (SrcTy.isFloatVector() && ExtendOp == TargetOpcode::G_FPEXT) {
612+
B.buildFPExt(DstRegs[i], Unmerge);
613+
} else {
614+
if (SrcTy.getElementType().isFloat()) {
615+
Unmerge = B.buildBitcast(SrcTy.getElementType().dropType(), Unmerge)
616+
.getReg(0);
617+
}
618+
B.buildAnyExt(DstRegs[i], Unmerge);
619+
}
620+
}
578621
return;
579622
}
580623

@@ -590,6 +633,11 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
590633

591634
LLT GCDTy = getGCDType(SrcTy, PartTy);
592635
if (GCDTy == PartTy) {
636+
if (SrcTy.getScalarType().isFloat() && !PartTy.getScalarType().isFloat()) {
637+
B.buildUnmerge(DstRegs, B.buildBitcast(SrcTy.dropType(), SrcReg));
638+
return;
639+
}
640+
593641
// If this already evenly divisible, we can create a simple unmerge.
594642
B.buildUnmerge(DstRegs, SrcReg);
595643
return;
@@ -599,9 +647,16 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
599647
SrcTy.getScalarSizeInBits() > PartTy.getSizeInBits()) {
600648
LLT ExtTy =
601649
LLT::vector(SrcTy.getElementCount(),
602-
LLT::scalar(PartTy.getScalarSizeInBits() * DstRegs.size() /
603-
SrcTy.getNumElements()));
604-
auto Ext = B.buildAnyExt(ExtTy, SrcReg);
650+
LLT::integer(PartTy.getScalarSizeInBits() * DstRegs.size() /
651+
SrcTy.getNumElements()));
652+
Register Ext;
653+
if (SrcTy.isFloatVector() && ExtendOp == TargetOpcode::G_FPEXT) {
654+
auto Cast = B.buildBitcast(SrcTy.dropType(), SrcReg).getReg(0);
655+
Ext = B.buildAnyExt(ExtTy, Cast).getReg(0);
656+
} else {
657+
Ext = B.buildAnyExt(ExtTy, SrcReg).getReg(0);
658+
}
659+
605660
B.buildUnmerge(DstRegs, Ext);
606661
return;
607662
}
@@ -626,7 +681,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
626681
// For scalars, it's common to be able to use a simple extension.
627682
if (SrcTy.isScalar() && DstTy.isScalar()) {
628683
CoveringSize = alignTo(SrcSize, DstSize);
629-
LLT CoverTy = LLT::scalar(CoveringSize);
684+
LLT CoverTy = LLT::integer(CoveringSize);
630685
UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0);
631686
} else {
632687
// Widen to the common type.
@@ -822,8 +877,11 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
822877
if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
823878
VA.getLocInfo() != CCValAssign::Indirect) {
824879
assert(Args[i].OrigRegs.size() == 1);
880+
unsigned ExtendOp = extendOpFromFlags(Args[i].Flags[0]);
881+
if ((OrigTy.isFloat() || OrigTy.isFloatVector()) && ValTy.isFloat())
882+
ExtendOp = TargetOpcode::G_FPEXT;
825883
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
826-
ValTy, extendOpFromFlags(Args[i].Flags[0]));
884+
ValTy, ExtendOp);
827885
}
828886

829887
bool IndirectParameterPassingHandled = false;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
862862
// This value may be smaller or larger than the target's pointer type, and
863863
// therefore require extension or truncating.
864864
auto *PtrIRTy = PointerType::getUnqual(SValue.getContext());
865-
const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
865+
const LLT PtrScalarTy = LLT::integer(DL->getTypeSizeInBits(PtrIRTy));
866866
Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
867867

868868
JT.Reg = Sub.getReg(0);
@@ -879,7 +879,8 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
879879
auto Cst = getOrCreateVReg(
880880
*ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
881881
Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
882-
auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
882+
LLT CmpTy = LLT::integer(1);
883+
auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, CmpTy, Sub, Cst);
883884

884885
auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
885886

@@ -910,7 +911,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
910911
return;
911912
}
912913

913-
const LLT i1Ty = LLT::scalar(1);
914+
const LLT i1Ty = LLT::integer(1);
914915
// Build the compare.
915916
if (!CB.CmpMHS) {
916917
const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
@@ -1092,14 +1093,14 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
10921093
LLT MaskTy = SwitchOpTy;
10931094
if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
10941095
!llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits()))
1095-
MaskTy = LLT::scalar(PtrTy.getSizeInBits());
1096+
MaskTy = LLT::integer(PtrTy.getSizeInBits());
10961097
else {
10971098
// Ensure that the type will fit the mask value.
10981099
for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
10991100
if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
11001101
// Switch table case range are encoded into series of masks.
11011102
// Just use pointer type, it's guaranteed to fit.
1102-
MaskTy = LLT::scalar(PtrTy.getSizeInBits());
1103+
MaskTy = LLT::integer(PtrTy.getSizeInBits());
11031104
break;
11041105
}
11051106
}
@@ -1122,8 +1123,9 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
11221123
if (!B.FallthroughUnreachable) {
11231124
// Conditional branch to the default block.
11241125
auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
1125-
auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
1126-
RangeSub, RangeCst);
1126+
LLT CmpTy = LLT::integer(1);
1127+
auto RangeCmp =
1128+
MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, CmpTy, RangeSub, RangeCst);
11271129
MIB.buildBrCond(RangeCmp, *B.Default);
11281130
}
11291131

@@ -1141,6 +1143,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
11411143
MIB.setMBB(*SwitchBB);
11421144

11431145
LLT SwitchTy = getLLTForMVT(BB.RegVT);
1146+
LLT I1 = LLT::integer(1);
11441147
Register Cmp;
11451148
unsigned PopCount = llvm::popcount(B.Mask);
11461149
if (PopCount == 1) {
@@ -1149,14 +1152,12 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
11491152
auto MaskTrailingZeros =
11501153
MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
11511154
Cmp =
1152-
MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
1153-
.getReg(0);
1155+
MIB.buildICmp(ICmpInst::ICMP_EQ, I1, Reg, MaskTrailingZeros).getReg(0);
11541156
} else if (PopCount == BB.Range) {
11551157
// There is only one zero bit in the range, test for it directly.
11561158
auto MaskTrailingOnes =
11571159
MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
1158-
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
1159-
.getReg(0);
1160+
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, Reg, MaskTrailingOnes).getReg(0);
11601161
} else {
11611162
// Make desired shift.
11621163
auto CstOne = MIB.buildConstant(SwitchTy, 1);
@@ -1166,8 +1167,7 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
11661167
auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
11671168
auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
11681169
auto CstZero = MIB.buildConstant(SwitchTy, 0);
1169-
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
1170-
.getReg(0);
1170+
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, I1, AndOp, CstZero).getReg(0);
11711171
}
11721172

11731173
// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
@@ -1691,7 +1691,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
16911691
SrcRegs.push_back(SrcReg);
16921692
}
16931693

1694-
LLT SizeTy = LLT::scalar(MinPtrSize);
1694+
LLT SizeTy = LLT::integer(MinPtrSize);
16951695

16961696
// The size operand should be the minimum of the pointer sizes.
16971697
Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
@@ -2812,7 +2812,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
28122812
DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
28132813
LLT MemTy = Info.memVT.isSimple()
28142814
? getLLTForMVT(Info.memVT.getSimpleVT())
2815-
: LLT::scalar(Info.memVT.getStoreSizeInBits());
2815+
: LLT::integer(Info.memVT.getStoreSizeInBits());
28162816

28172817
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
28182818
// didn't yield anything useful.
@@ -3158,7 +3158,7 @@ bool IRTranslator::translateInsertElement(const User &U,
31583158
if (!Idx)
31593159
Idx = getOrCreateVReg(*U.getOperand(2));
31603160
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
3161-
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
3161+
const LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth);
31623162
Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
31633163
}
31643164
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
@@ -3201,7 +3201,7 @@ bool IRTranslator::translateInsertVector(const User &U,
32013201
if (isa<ScalableVectorType>(U.getOperand(0)->getType())) {
32023202
// We are inserting an illegal fixed vector into a scalable
32033203
// vector, use a scalar element insert.
3204-
LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
3204+
LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth);
32053205
Register Idx = getOrCreateVReg(*CI);
32063206
auto ScaledIndex = MIRBuilder.buildMul(
32073207
VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx);
@@ -3239,7 +3239,7 @@ bool IRTranslator::translateExtractElement(const User &U,
32393239
if (!Idx)
32403240
Idx = getOrCreateVReg(*U.getOperand(1));
32413241
if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
3242-
const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
3242+
const LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth);
32433243
Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
32443244
}
32453245
MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
@@ -3279,7 +3279,7 @@ bool IRTranslator::translateExtractVector(const User &U,
32793279
if (isa<ScalableVectorType>(U.getOperand(0)->getType())) {
32803280
// We are extracting an illegal fixed vector from a scalable
32813281
// vector, use a scalar element extract.
3282-
LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
3282+
LLT VecIdxTy = LLT::integer(PreferredVecIdxWidth);
32833283
Register Idx = getOrCreateVReg(*CI);
32843284
auto ScaledIndex = MIRBuilder.buildMul(
32853285
VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx);
@@ -3868,8 +3868,8 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
38683868
// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
38693869
// Otherwise, emit a volatile load to retrieve the stack guard value.
38703870
if (TLI->useLoadStackGuardNode(*ParentBB->getBasicBlock()->getModule())) {
3871-
Guard =
3872-
MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
3871+
LLT RegTy = LLT::integer(PtrTy.getSizeInBits());
3872+
Guard = MRI->createGenericVirtualRegister(RegTy);
38733873
getStackGuard(Guard, *CurBuilder);
38743874
} else {
38753875
// TODO: test using android subtarget when we support @llvm.thread.pointer.
@@ -3885,8 +3885,8 @@ bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
38853885
}
38863886

38873887
// Perform the comparison.
3888-
auto Cmp =
3889-
CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
3888+
LLT I1 = LLT::integer(1);
3889+
auto Cmp = CurBuilder->buildICmp(CmpInst::ICMP_NE, I1, Guard, GuardVal);
38903890
// If the guard/stackslot do not equal, branch to failure MBB.
38913891
CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
38923892
// Otherwise branch to success MBB.

0 commit comments

Comments
 (0)