Skip to content

Commit db77156

Browse files
committed
Merge remote-tracking branch 'origin/llvm.org/main' into users/ojhunt/P2719
2 parents f196dc3 + dbd219a commit db77156

22 files changed

+1018
-314
lines changed

clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
203203
// a variable of the type "dispatch_semaphore_t".
204204
if (isSemaphoreCaptured(*B.getDecl()))
205205
return;
206-
for (const MemRegion *Region :
207-
llvm::make_first_range(getCapturedStackRegions(B, C))) {
206+
auto Regions = getCapturedStackRegions(B, C);
207+
for (const MemRegion *Region : llvm::make_first_range(Regions)) {
208208
// The block passed to dispatch_async may capture another block
209209
// created on the stack. However, there is no leak in this situaton,
210210
// no matter if ARC or no ARC is enabled:

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,7 +1423,7 @@ Register RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
14231423
bool SplitSubClass = true;
14241424
if (!RegClassInfo.isProperSubClass(CurRC)) {
14251425
if (!VirtReg.hasSubRanges())
1426-
return 0;
1426+
return Register();
14271427
SplitSubClass = false;
14281428
}
14291429

@@ -1434,7 +1434,7 @@ Register RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
14341434

14351435
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
14361436
if (Uses.size() <= 1)
1437-
return 0;
1437+
return Register();
14381438

14391439
LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
14401440
<< " individual instrs.\n");
@@ -1586,7 +1586,7 @@ Register RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
15861586

15871587
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
15881588
if (Uses.size() <= 2)
1589-
return 0;
1589+
return Register();
15901590
const unsigned NumGaps = Uses.size()-1;
15911591

15921592
LLVM_DEBUG({
@@ -2184,7 +2184,7 @@ MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
21842184
/// range can have lower cost than using the CSR for the first time;
21852185
/// Spilling a live range in the cold path can have lower cost than using
21862186
/// the CSR for the first time. Returns the physical register if we decide
2187-
/// to use the CSR; otherwise return 0.
2187+
/// to use the CSR; otherwise return MCRegister().
21882188
MCRegister RAGreedy::tryAssignCSRFirstTime(
21892189
const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
21902190
uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
@@ -2456,7 +2456,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
24562456
// queue. The RS_Split ranges already failed to do this, and they should not
24572457
// get a second chance until they have been split.
24582458
if (Stage != RS_Split)
2459-
if (Register PhysReg =
2459+
if (MCRegister PhysReg =
24602460
tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
24612461
FixedRegisters)) {
24622462
Register Hint = MRI->getSimpleHint(VirtReg.reg());

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
6464
}
6565

6666
MayNeedAGPRs = ST.hasMAIInsts();
67+
if (ST.hasGFX90AInsts() &&
68+
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
69+
!mayUseAGPRs(F))
70+
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
6771

6872
if (AMDGPU::isChainCC(CC)) {
6973
// Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
98102
ImplicitArgPtr = true;
99103
} else {
100104
ImplicitArgPtr = false;
101-
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
102-
MaxKernArgAlign);
103-
104-
if (ST.hasGFX90AInsts() &&
105-
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
106-
!mayUseAGPRs(F))
107-
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
105+
MaxKernArgAlign =
106+
std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
108107
}
109108

110109
if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
783782
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
784783
return !F.hasFnAttribute("amdgpu-no-agpr");
785784
}
786-
787-
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
788-
if (UsesAGPRs)
789-
return *UsesAGPRs;
790-
791-
if (!mayNeedAGPRs()) {
792-
UsesAGPRs = false;
793-
return false;
794-
}
795-
796-
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
797-
MF.getFrameInfo().hasCalls()) {
798-
UsesAGPRs = true;
799-
return true;
800-
}
801-
802-
const MachineRegisterInfo &MRI = MF.getRegInfo();
803-
804-
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
805-
const Register Reg = Register::index2VirtReg(I);
806-
const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
807-
if (RC && SIRegisterInfo::isAGPRClass(RC)) {
808-
UsesAGPRs = true;
809-
return true;
810-
}
811-
if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
812-
// Defer caching UsesAGPRs, function might not yet been regbank selected.
813-
return true;
814-
}
815-
}
816-
817-
for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
818-
if (MRI.isPhysRegUsed(Reg)) {
819-
UsesAGPRs = true;
820-
return true;
821-
}
822-
}
823-
824-
UsesAGPRs = false;
825-
return false;
826-
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
494494
// scheduler stage.
495495
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
496496

497-
mutable std::optional<bool> UsesAGPRs;
498-
499497
MCPhysReg getNextUserSGPR() const;
500498

501499
MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
11261124
// has a call which may use it.
11271125
bool mayUseAGPRs(const Function &F) const;
11281126

1129-
// \returns true if a function needs or may need AGPRs.
1130-
bool usesAGPRs(const MachineFunction &MF) const;
1131-
11321127
/// \returns Default/requested number of work groups for this function.
11331128
SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
11341129

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
585585
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
586586
// register file accordingly.
587587
if (ST.hasGFX90AInsts()) {
588-
if (MFI->usesAGPRs(MF)) {
588+
if (MFI->mayNeedAGPRs()) {
589589
MaxNumVGPRs /= 2;
590590
MaxNumAGPRs = MaxNumVGPRs;
591591
} else {

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -554,13 +554,11 @@ class RISCVVPseudo {
554554
Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
555555
// SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
556556
bits<8> SEW = 0;
557-
bit NeedBeInPseudoTable = 1;
558557
}
559558

560559
// The actual table.
561560
def RISCVVPseudosTable : GenericTable {
562561
let FilterClass = "RISCVVPseudo";
563-
let FilterClassField = "NeedBeInPseudoTable";
564562
let CppTypeName = "PseudoInfo";
565563
let Fields = [ "Pseudo", "BaseInstr" ];
566564
let PrimaryKey = [ "Pseudo" ];
@@ -1023,11 +1021,7 @@ class VPseudoNullaryPseudoM<string BaseInst> :
10231021
let hasSideEffects = 0;
10241022
let HasVLOp = 1;
10251023
let HasSEWOp = 1;
1026-
// BaseInstr is not used in RISCVExpandPseudoInsts pass.
1027-
// Just fill a corresponding real v-inst to pass tablegen check.
10281024
let BaseInstr = !cast<Instruction>(BaseInst);
1029-
// We exclude them from RISCVVPseudoTable.
1030-
let NeedBeInPseudoTable = 0;
10311025
}
10321026

10331027
class VPseudoUnaryNoMask<DAGOperand RetClass,
@@ -2168,7 +2162,7 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
21682162
int sew = 0,
21692163
bits<2> TargetConstraintType = 1> {
21702164
defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
2171-
let VLMul = MInfo.value in {
2165+
let VLMul = MInfo.value, SEW=sew in {
21722166
def suffix # "_TIED":
21732167
VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
21742168
def suffix # "_MASK_TIED" :

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,9 @@ static InstructionCost
393393
costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT,
394394
std::optional<unsigned> VLen, VectorType *Tp,
395395
ArrayRef<int> Mask, TTI::TargetCostKind CostKind) {
396+
assert(LegalVT.isFixedLengthVector());
396397
InstructionCost NumOfDests = InstructionCost::getInvalid();
397-
if (VLen && LegalVT.isFixedLengthVector() && !Mask.empty()) {
398+
if (VLen && !Mask.empty()) {
398399
MVT ElemVT = LegalVT.getVectorElementType();
399400
unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
400401
LegalVT = TTI.getTypeLegalizationCost(
@@ -404,7 +405,6 @@ costShuffleViaVRegSplitting(RISCVTTIImpl &TTI, MVT LegalVT,
404405
NumOfDests = divideCeil(Mask.size(), LegalVT.getVectorNumElements());
405406
}
406407
if (!NumOfDests.isValid() || NumOfDests <= 1 ||
407-
!LegalVT.isFixedLengthVector() ||
408408
LegalVT.getVectorElementType().getSizeInBits() !=
409409
Tp->getElementType()->getPrimitiveSizeInBits() ||
410410
LegalVT.getVectorNumElements() >= Tp->getElementCount().getFixedValue())
@@ -487,7 +487,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
487487
// First, handle cases where having a fixed length vector enables us to
488488
// give a more accurate cost than falling back to generic scalable codegen.
489489
// TODO: Each of these cases hints at a modeling gap around scalable vectors.
490-
if (ST->hasVInstructions() && isa<FixedVectorType>(Tp)) {
490+
if (ST->hasVInstructions() && isa<FixedVectorType>(Tp) &&
491+
LT.second.isFixedLengthVector()) {
491492
InstructionCost VRegSplittingCost = costShuffleViaVRegSplitting(
492493
*this, LT.second, ST->getRealVLen(), Tp, Mask, CostKind);
493494
if (VRegSplittingCost.isValid())
@@ -496,7 +497,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
496497
default:
497498
break;
498499
case TTI::SK_PermuteSingleSrc: {
499-
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
500+
if (Mask.size() >= 2) {
500501
MVT EltTp = LT.second.getVectorElementType();
501502
// If the size of the element is < ELEN then shuffles of interleaves and
502503
// deinterleaves of 2 vectors can be lowered into the following
@@ -545,24 +546,23 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
545546
}
546547
// vrgather + cost of generating the mask constant.
547548
// We model this for an unknown mask with a single vrgather.
548-
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
549-
(LT.second.getScalarSizeInBits() != 8 ||
550-
LT.second.getVectorNumElements() <= 256)) {
551-
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
549+
if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
550+
LT.second.getVectorNumElements() <= 256)) {
551+
VectorType *IdxTy =
552+
getVRGatherIndexType(LT.second, *ST, Tp->getContext());
552553
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
553554
return IndexCost +
554555
getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
555556
}
556-
[[fallthrough]];
557+
break;
557558
}
558559
case TTI::SK_Transpose:
559560
case TTI::SK_PermuteTwoSrc: {
560561
// 2 x (vrgather + cost of generating the mask constant) + cost of mask
561562
// register for the second vrgather. We model this for an unknown
562563
// (shuffle) mask.
563-
if (LT.second.isFixedLengthVector() && LT.first == 1 &&
564-
(LT.second.getScalarSizeInBits() != 8 ||
565-
LT.second.getVectorNumElements() <= 256)) {
564+
if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
565+
LT.second.getVectorNumElements() <= 256)) {
566566
auto &C = Tp->getContext();
567567
auto EC = Tp->getElementCount();
568568
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
@@ -574,56 +574,65 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
574574
LT.second, CostKind) +
575575
MaskCost;
576576
}
577-
[[fallthrough]];
578-
}
579-
case TTI::SK_Select: {
580-
// We are going to permute multiple sources and the result will be in
581-
// multiple destinations. Providing an accurate cost only for splits where
582-
// the element type remains the same.
583-
if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
584-
LT.second.isFixedLengthVector() &&
585-
LT.second.getVectorElementType().getSizeInBits() ==
586-
Tp->getElementType()->getPrimitiveSizeInBits() &&
587-
LT.second.getVectorNumElements() <
588-
cast<FixedVectorType>(Tp)->getNumElements() &&
589-
divideCeil(Mask.size(),
590-
cast<FixedVectorType>(Tp)->getNumElements()) ==
591-
static_cast<unsigned>(*LT.first.getValue())) {
592-
unsigned NumRegs = *LT.first.getValue();
593-
unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
594-
unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
595-
auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
596-
597-
InstructionCost Cost = 0;
598-
for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF);
599-
I < NumSrcRegs; ++I) {
600-
bool IsSingleVector = true;
601-
SmallVector<int> SubMask(SubVF, PoisonMaskElem);
602-
transform(
603-
Mask.slice(I * SubVF,
604-
I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
605-
SubMask.begin(), [&](int I) -> int {
606-
if (I == PoisonMaskElem)
607-
return PoisonMaskElem;
608-
bool SingleSubVector = I / VF == 0;
609-
IsSingleVector &= SingleSubVector;
610-
return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
611-
});
612-
if (all_of(enumerate(SubMask), [](auto &&P) {
613-
return P.value() == PoisonMaskElem ||
614-
static_cast<unsigned>(P.value()) == P.index();
615-
}))
616-
continue;
617-
Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc
618-
: TTI::SK_PermuteTwoSrc,
619-
SubVecTy, SubMask, CostKind, 0, nullptr);
620-
}
621-
return Cost;
622-
}
623577
break;
624578
}
625579
}
626-
};
580+
581+
auto shouldSplit = [](TTI::ShuffleKind Kind) {
582+
switch (Kind) {
583+
default:
584+
return false;
585+
case TTI::SK_PermuteSingleSrc:
586+
case TTI::SK_Transpose:
587+
case TTI::SK_PermuteTwoSrc:
588+
case TTI::SK_Select:
589+
return true;
590+
}
591+
};
592+
// We are going to permute multiple sources and the result will be in
593+
// multiple destinations. Providing an accurate cost only for splits where
594+
// the element type remains the same.
595+
if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
596+
shouldSplit(Kind) &&
597+
LT.second.getVectorElementType().getSizeInBits() ==
598+
Tp->getElementType()->getPrimitiveSizeInBits() &&
599+
LT.second.getVectorNumElements() <
600+
cast<FixedVectorType>(Tp)->getNumElements() &&
601+
divideCeil(Mask.size(),
602+
cast<FixedVectorType>(Tp)->getNumElements()) ==
603+
static_cast<unsigned>(*LT.first.getValue())) {
604+
unsigned NumRegs = *LT.first.getValue();
605+
unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
606+
unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
607+
auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
608+
609+
InstructionCost Cost = 0;
610+
for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF);
611+
I < NumSrcRegs; ++I) {
612+
bool IsSingleVector = true;
613+
SmallVector<int> SubMask(SubVF, PoisonMaskElem);
614+
transform(
615+
Mask.slice(I * SubVF,
616+
I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
617+
SubMask.begin(), [&](int I) -> int {
618+
if (I == PoisonMaskElem)
619+
return PoisonMaskElem;
620+
bool SingleSubVector = I / VF == 0;
621+
IsSingleVector &= SingleSubVector;
622+
return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
623+
});
624+
if (all_of(enumerate(SubMask), [](auto &&P) {
625+
return P.value() == PoisonMaskElem ||
626+
static_cast<unsigned>(P.value()) == P.index();
627+
}))
628+
continue;
629+
Cost += getShuffleCost(IsSingleVector ? TTI::SK_PermuteSingleSrc
630+
: TTI::SK_PermuteTwoSrc,
631+
SubVecTy, SubMask, CostKind, 0, nullptr);
632+
}
633+
return Cost;
634+
}
635+
}
627636

628637
// Handle scalable vectors (and fixed vectors legalized to scalable vectors).
629638
switch (Kind) {

0 commit comments

Comments
 (0)