Skip to content

Commit eebea79

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merge upstream LLVM into amd-gfx12
2 parents 58d655f + 92d0924 commit eebea79

File tree

21 files changed

+2223
-409
lines changed

21 files changed

+2223
-409
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,9 @@ class DataAggregator : public DataReader {
502502
/// entries).
503503
void imputeFallThroughs();
504504

505+
/// Register profiled functions for lite mode.
506+
void registerProfiledFunctions();
507+
505508
/// Debugging dump methods
506509
void dump() const;
507510
void dump(const PerfBranchSample &Sample) const;

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,26 @@ void DataAggregator::imputeFallThroughs() {
581581
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";
582582
}
583583

584+
void DataAggregator::registerProfiledFunctions() {
585+
DenseSet<uint64_t> Addrs;
586+
for (const auto &Trace : llvm::make_first_range(Traces)) {
587+
if (Trace.Branch != Trace::FT_ONLY &&
588+
Trace.Branch != Trace::FT_EXTERNAL_ORIGIN)
589+
Addrs.insert(Trace.Branch);
590+
Addrs.insert(Trace.From);
591+
}
592+
593+
for (const auto [PC, _] : BasicSamples)
594+
Addrs.insert(PC);
595+
596+
for (const PerfMemSample &MemSample : MemSamples)
597+
Addrs.insert(MemSample.PC);
598+
599+
for (const uint64_t Addr : Addrs)
600+
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr))
601+
Func->setHasProfileAvailable();
602+
}
603+
584604
Error DataAggregator::preprocessProfile(BinaryContext &BC) {
585605
this->BC = &BC;
586606

@@ -603,6 +623,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
603623
exit(0);
604624
}
605625

626+
registerProfiledFunctions();
606627
return Error::success();
607628
}
608629

@@ -1347,10 +1368,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
13471368
}
13481369

13491370
const uint64_t FromOffset = Addr[0]->Offset;
1350-
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
1351-
if (FromFunc)
1352-
FromFunc->setHasProfileAvailable();
1353-
13541371
int64_t Count = Counters[0];
13551372
int64_t Mispreds = Counters[1];
13561373

@@ -1361,11 +1378,6 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
13611378
return std::error_code();
13621379
}
13631380

1364-
const uint64_t ToOffset = Addr[1]->Offset;
1365-
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
1366-
if (ToFunc)
1367-
ToFunc->setHasProfileAvailable();
1368-
13691381
/// For fall-through types, adjust locations to match Trace container.
13701382
if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) {
13711383
Addr[2] = Location(Addr[1]->Offset); // Trace To
@@ -1613,9 +1625,6 @@ std::error_code DataAggregator::parseBranchEvents() {
16131625
Traces.reserve(TraceMap.size());
16141626
for (const auto &[Trace, Info] : TraceMap) {
16151627
Traces.emplace_back(Trace, Info);
1616-
for (const uint64_t Addr : {Trace.Branch, Trace.From})
1617-
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
1618-
BF->setHasProfileAvailable();
16191628
}
16201629
clear(TraceMap);
16211630

@@ -1676,9 +1685,6 @@ std::error_code DataAggregator::parseBasicEvents() {
16761685
continue;
16771686
++NumTotalSamples;
16781687

1679-
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1680-
BF->setHasProfileAvailable();
1681-
16821688
++BasicSamples[Sample->PC];
16831689
EventNames.insert(Sample->EventName);
16841690
}
@@ -1716,9 +1722,6 @@ std::error_code DataAggregator::parseMemEvents() {
17161722
if (std::error_code EC = Sample.getError())
17171723
return EC;
17181724

1719-
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Sample->PC))
1720-
BF->setHasProfileAvailable();
1721-
17221725
MemSamples.emplace_back(std::move(Sample.get()));
17231726
}
17241727

clang/lib/Sema/SemaOverload.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8042,8 +8042,8 @@ static void AddTemplateOverloadCandidateImmediately(
80428042

80438043
Candidate.IgnoreObjectArgument =
80448044
isa<CXXMethodDecl>(Candidate.Function) &&
8045-
cast<CXXMethodDecl>(Candidate.Function)
8046-
->isImplicitObjectMemberFunction() &&
8045+
!cast<CXXMethodDecl>(Candidate.Function)
8046+
->isExplicitObjectMemberFunction() &&
80478047
!isa<CXXConstructorDecl>(Candidate.Function);
80488048

80498049
Candidate.ExplicitCallArguments = Args.size();

clang/test/SemaCXX/cxx2b-deducing-this.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,3 +1357,35 @@ void Bar(this int) { // expected-note {{candidate function}}
13571357
}
13581358

13591359
}
1360+
1361+
namespace GH147046_regression {
1362+
1363+
template <typename z> struct ai {
1364+
ai(z::ah);
1365+
};
1366+
1367+
template <typename z> struct ak {
1368+
template <typename am> void an(am, z);
1369+
template <typename am> static void an(am, ai<z>);
1370+
};
1371+
template <typename> struct ao {};
1372+
1373+
template <typename ap>
1374+
auto ar(ao<ap> at) -> decltype(ak<ap>::an(at, 0));
1375+
// expected-note@-1 {{candidate template ignored: substitution failure [with ap = GH147046_regression::ay]: no matching function for call to 'an'}}
1376+
1377+
class aw;
1378+
struct ax {
1379+
typedef int ah;
1380+
};
1381+
struct ay {
1382+
typedef aw ah;
1383+
};
1384+
1385+
ao<ay> az ;
1386+
ai<ax> bd(0);
1387+
void f() {
1388+
ar(az); // expected-error {{no matching function for call to 'ar'}}
1389+
}
1390+
1391+
}

lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ class NativeRegisterContextLinux_arm : public NativeRegisterContextLinux {
109109
// occurred.
110110
lldb::addr_t real_addr; // Address value that should cause target to stop.
111111
uint32_t control; // Breakpoint/watchpoint control value.
112-
uint32_t refcount; // Serves as enable/disable and reference counter.
113112
};
114113

115114
struct DREG m_hbr_regs[16]; // Arm native linux hardware breakpoints

llvm/include/llvm/Support/DebugLog.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ namespace llvm {
4040
DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, TYPE, __SHORT_FILE__)
4141
#else
4242
#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, TYPE) \
43-
DEBUGLOG_WITH_STREAM_TYPE_AND_FILE( \
44-
STREAM, TYPE, ::llvm::impl::LogWithNewline::getShortFileName(__FILE__))
43+
DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, TYPE, \
44+
::llvm::impl::getShortFileName(__FILE__))
4545
#endif
4646

4747
namespace impl {

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "AMDGPU.h"
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
16-
#include "llvm/CodeGen/TargetPassConfig.h"
1716
#include "llvm/IR/IntrinsicsAMDGPU.h"
1817
#include "llvm/IR/IntrinsicsR600.h"
1918
#include "llvm/Target/TargetMachine.h"

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -708,10 +708,14 @@ static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
708708
auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
709709
if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
710710
all_equal(Shuf->getShuffleMask()) &&
711-
Shuf->getType() == Shuf->getOperand(0)->getType()) {
711+
ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
712+
cast<VectorType>(Shuf->getOperand(0)->getType())
713+
->getElementCount())) {
712714
// trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
713715
// trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
714-
Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
716+
Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
717+
Trunc.getType()->getScalarType());
718+
Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
715719
return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
716720
}
717721

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,11 +1363,15 @@ class LoopVectorizationCostModel {
13631363
TTI.hasActiveVectorLength() && !EnableVPlanNativePath;
13641364
if (EVLIsLegal)
13651365
return;
1366-
// If for some reason EVL mode is unsupported, fallback to
1367-
// DataWithoutLaneMask to try to vectorize the loop with folded tail
1368-
// in a generic way.
1369-
ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask,
1370-
TailFoldingStyle::DataWithoutLaneMask};
1366+
// If for some reason EVL mode is unsupported, fallback to a scalar epilogue
1367+
// if it's allowed, or DataWithoutLaneMask otherwise.
1368+
if (ScalarEpilogueStatus == CM_ScalarEpilogueAllowed ||
1369+
ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate)
1370+
ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None};
1371+
else
1372+
ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask,
1373+
TailFoldingStyle::DataWithoutLaneMask};
1374+
13711375
LLVM_DEBUG(
13721376
dbgs() << "LV: Preference for VP intrinsics indicated. Will "
13731377
"not try to generate VP Intrinsics "
@@ -4500,19 +4504,17 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45004504
Type *TCType = Legal->getWidestInductionType();
45014505
const SCEV *RemainingIterations = nullptr;
45024506
unsigned MaxTripCount = 0;
4503-
if (MainLoopVF.isFixed()) {
4504-
// TODO: extend to support scalable VFs.
4505-
const SCEV *TC = vputils::getSCEVExprForVPValue(
4506-
getPlanFor(MainLoopVF).getTripCount(), SE);
4507-
assert(!isa<SCEVCouldNotCompute>(TC) &&
4508-
"Trip count SCEV must be computable");
4509-
RemainingIterations = SE.getURemExpr(
4510-
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4511-
4512-
// No iterations left to process in the epilogue.
4513-
if (RemainingIterations->isZero())
4514-
return Result;
4507+
const SCEV *TC =
4508+
vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
4509+
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
4510+
RemainingIterations =
4511+
SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
4512+
4513+
// No iterations left to process in the epilogue.
4514+
if (RemainingIterations->isZero())
4515+
return Result;
45154516

4517+
if (MainLoopVF.isFixed()) {
45164518
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
45174519
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
45184520
SE.getConstant(TCType, MaxTripCount))) {

0 commit comments

Comments
 (0)