Skip to content

Commit 882dde7

Browse files
committed
Fixed heatmap mode
Created using spr 1.3.4
2 parents 5c513dc + 0eaaa1b commit 882dde7

File tree

95 files changed

+4495
-1012
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+4495
-1012
lines changed

.github/new-prs-labeler.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ mlgo:
702702
- llvm/unittests/CodeGen/ML*
703703
- llvm/test/CodeGen/MLRegAlloc/**
704704
- llvm/utils/mlgo-utils/**
705+
- llvm/docs/MLGO.rst
705706

706707
tools:llvm-exegesis:
707708
- llvm/tools/llvm-exegesis/**

bolt/docs/Heatmaps.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,13 @@ For the generation, the default bucket size was used with a line size of 128.
8989
Some useful options are:
9090

9191
```
92-
-line-size=<uint> - number of entries per line (default 256)
92+
-block-size=<uint> - heatmap bucket size in bytes (default 64)
93+
-line-size=<uint> - number of entries per line (default 256).
94+
Use 128 if the heatmap doesn't fit screen horizontally.
9395
-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
9496
-print-mappings - print mappings in the legend, between characters/blocks and text sections (default false)
97+
-heatmap-zoom-out=<uint>,... - print zoomed out heatmaps with given block sizes,
98+
must be multiples of block-size in ascending order.
99+
Suggested values: 4096 (default page size), 16384 (16k page),
100+
1048576 (1MB for XL workloads).
95101
```

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -370,10 +370,15 @@ class DataAggregator : public DataReader {
370370
/// memory.
371371
///
372372
/// File format syntax:
373-
/// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
373+
/// E <event>
374+
/// S <start> <count>
375+
/// T <start> <end> <ft_end> <count>
376+
/// B <start> <end> <count> <mispred_count>
377+
/// [Ff] <start> <end> <count>
374378
///
375379
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
376380
///
381+
/// E - name of the sampling event used for subsequent entries
377382
/// S - indicates an aggregated basic sample at <start>
378383
/// B - indicates an aggregated branch from <start> to <end>
379384
/// F - an aggregated fall-through from <start> to <end>
@@ -390,23 +395,25 @@ class DataAggregator : public DataReader {
390395
/// <offset> - hex offset from the object base load address (0 for the
391396
/// main executable unless it's PIE) to the address.
392397
///
393-
/// <count> - total aggregated count of the branch or a fall-through.
398+
/// <count> - total aggregated count.
394399
///
395400
/// <mispred_count> - the number of times the branch was mispredicted.
396-
/// Omitted for fall-throughs.
397401
///
398402
/// Example:
399403
/// Basic samples profile:
404+
/// E cycles
400405
/// S 41be50 3
406+
/// E br_inst_retired.near_taken
407+
/// S 41be60 6
401408
///
402-
/// Soft-deprecated branch profile with separate branches and fall-throughs:
409+
/// Trace profile combining branches and fall-throughs:
410+
/// T 4b196f 4b19e0 4b19ef 2
411+
///
412+
/// Legacy branch profile with separate branches and fall-throughs:
403413
/// F 41be50 41be50 3
404414
/// F 41be90 41be90 4
405415
/// B 4b1942 39b57f0 3 0
406416
/// B 4b196f 4b19e0 2 0
407-
///
408-
/// Recommended branch profile with pre-aggregated traces:
409-
/// T 4b196f 4b19e0 4b19ef 2
410417
void parsePreAggregated();
411418

412419
/// Parse the full output of pre-aggregated LBR samples generated by

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,85 +1210,105 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
12101210
}
12111211

12121212
std::error_code DataAggregator::parseAggregatedLBREntry() {
1213-
while (checkAndConsumeFS()) {
1214-
}
1213+
enum AggregatedLBREntry : char {
1214+
INVALID = 0,
1215+
EVENT_NAME, // E
1216+
TRACE, // T
1217+
SAMPLE, // S
1218+
BRANCH, // B
1219+
FT, // F
1220+
FT_EXTERNAL_ORIGIN // f
1221+
} Type = INVALID;
1222+
1223+
// The number of fields to parse, set based on Type.
1224+
int AddrNum = 0;
1225+
int CounterNum = 0;
1226+
// Storage for parsed fields.
1227+
StringRef EventName;
1228+
std::optional<Location> Addr[3];
1229+
int64_t Counters[2];
12151230

1216-
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1217-
if (std::error_code EC = TypeOrErr.getError())
1218-
return EC;
1219-
enum AggregatedLBREntry {
1220-
TRACE,
1221-
SAMPLE,
1222-
BRANCH,
1223-
FT,
1224-
FT_EXTERNAL_ORIGIN,
1225-
INVALID
1226-
};
1227-
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
1228-
.Case("T", TRACE)
1229-
.Case("S", SAMPLE)
1230-
.Case("B", BRANCH)
1231-
.Case("F", FT)
1232-
.Case("f", FT_EXTERNAL_ORIGIN)
1233-
.Default(INVALID);
1234-
if (Type == INVALID) {
1235-
reportError("expected T, S, B, F or f");
1236-
return make_error_code(llvm::errc::io_error);
1237-
}
1231+
while (Type == INVALID || Type == EVENT_NAME) {
1232+
while (checkAndConsumeFS()) {
1233+
}
1234+
ErrorOr<StringRef> StrOrErr =
1235+
parseString(FieldSeparator, Type == EVENT_NAME);
1236+
if (std::error_code EC = StrOrErr.getError())
1237+
return EC;
1238+
StringRef Str = StrOrErr.get();
12381239

1239-
std::optional<Location> Addrs[3];
1240-
int AddrNum = 2;
1241-
if (Type == TRACE)
1242-
AddrNum = 3;
1243-
else if (Type == SAMPLE)
1244-
AddrNum = 1;
1240+
if (Type == EVENT_NAME) {
1241+
EventName = Str;
1242+
break;
1243+
}
12451244

1246-
int64_t Counters[2];
1247-
int CounterNum = 1;
1248-
if (Type == BRANCH)
1249-
CounterNum = 2;
1245+
Type = StringSwitch<AggregatedLBREntry>(Str)
1246+
.Case("T", TRACE)
1247+
.Case("S", SAMPLE)
1248+
.Case("E", EVENT_NAME)
1249+
.Case("B", BRANCH)
1250+
.Case("F", FT)
1251+
.Case("f", FT_EXTERNAL_ORIGIN)
1252+
.Default(INVALID);
1253+
1254+
if (Type == INVALID) {
1255+
reportError("expected T, S, E, B, F or f");
1256+
return make_error_code(llvm::errc::io_error);
1257+
}
1258+
1259+
using SSI = StringSwitch<int>;
1260+
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
1261+
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
1262+
}
12501263

12511264
for (int I = 0; I < AddrNum; ++I) {
12521265
while (checkAndConsumeFS()) {
12531266
}
1254-
if (ErrorOr<Location> Addr = parseLocationOrOffset())
1255-
Addrs[I] = Addr.get();
1256-
else
1257-
return Addr.getError();
1267+
ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
1268+
if (std::error_code EC = AddrOrErr.getError())
1269+
return EC;
1270+
Addr[I] = AddrOrErr.get();
12581271
}
12591272

12601273
for (int I = 0; I < CounterNum; ++I) {
12611274
while (checkAndConsumeFS()) {
12621275
}
1263-
if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
1264-
Counters[I] = Count.get();
1265-
else
1266-
return Count.getError();
1276+
ErrorOr<int64_t> CountOrErr =
1277+
parseNumberField(FieldSeparator, I + 1 == CounterNum);
1278+
if (std::error_code EC = CountOrErr.getError())
1279+
return EC;
1280+
Counters[I] = CountOrErr.get();
12671281
}
12681282

12691283
if (!checkAndConsumeNewLine()) {
12701284
reportError("expected end of line");
12711285
return make_error_code(llvm::errc::io_error);
12721286
}
12731287

1274-
const uint64_t FromOffset = Addrs[0]->Offset;
1288+
if (Type == EVENT_NAME) {
1289+
EventNames.insert(EventName);
1290+
return std::error_code();
1291+
}
1292+
1293+
const uint64_t FromOffset = Addr[0]->Offset;
12751294
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
12761295
if (FromFunc)
12771296
FromFunc->setHasProfileAvailable();
12781297

1298+
int64_t Count = Counters[0];
1299+
int64_t Mispreds = Counters[1];
1300+
12791301
if (Type == SAMPLE) {
1280-
BasicSamples[FromOffset] += Counters[0];
1302+
BasicSamples[FromOffset] += Count;
1303+
NumTotalSamples += Count;
12811304
return std::error_code();
12821305
}
12831306

1284-
const uint64_t ToOffset = Addrs[1]->Offset;
1307+
const uint64_t ToOffset = Addr[1]->Offset;
12851308
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
12861309
if (ToFunc)
12871310
ToFunc->setHasProfileAvailable();
12881311

1289-
int64_t Count = Counters[0];
1290-
int64_t Mispreds = Counters[1];
1291-
12921312
Trace Trace(FromOffset, ToOffset);
12931313
// Taken trace
12941314
if (Type == TRACE || Type == BRANCH) {
@@ -1300,7 +1320,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
13001320
}
13011321
// Construct fallthrough part of the trace
13021322
if (Type == TRACE) {
1303-
const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
1323+
const uint64_t TraceFtEndOffset = Addr[2]->Offset;
13041324
Trace.From = ToOffset;
13051325
Trace.To = TraceFtEndOffset;
13061326
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;

bolt/test/X86/Inputs/pre-aggregated-basic.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
E cycles
12
S 4005f0 1
23
S 4005f0 1
34
S 400610 1

bolt/test/X86/pre-aggregated-perf.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,12 @@ RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
6060
## Test pre-aggregated basic profile
6161
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
6262
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
63-
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
63+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \
6464
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
65+
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL
6566
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
6667
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
68+
CHECK-BASIC-NL: no_lbr cycles
6769

6870
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
6971
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2

bolt/test/link_fdata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
3737

3838
# Pre-aggregated profile:
39-
# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
39+
# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
4040
# <loc>: [<id>:]<offset>
4141
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
4242

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,8 @@ Bug Fixes in This Version
615615
argument which contains a pragma. (#GH113722)
616616
- Fixed assertion failures when generating name lookup table in modules. (#GH61065, #GH134739)
617617
- Fixed an assertion failure in constant compound literal statements. (#GH139160)
618+
- Fix crash due to unknown references and pointer implementation and handling of
619+
base classes. (GH139452)
618620

619621
Bug Fixes to Compiler Builtins
620622
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

clang/include/clang/Basic/Attr.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4790,6 +4790,7 @@ def HLSLResourceBinding: InheritableAttr {
47904790
RegisterType RegType;
47914791
std::optional<unsigned> SlotNumber;
47924792
unsigned SpaceNumber;
4793+
std::optional<unsigned> ImplicitBindingOrderID;
47934794

47944795
public:
47954796
void setBinding(RegisterType RT, std::optional<unsigned> SlotNum, unsigned SpaceNum) {
@@ -4811,6 +4812,16 @@ def HLSLResourceBinding: InheritableAttr {
48114812
unsigned getSpaceNumber() const {
48124813
return SpaceNumber;
48134814
}
4815+
void setImplicitBindingOrderID(uint32_t Value) {
4816+
ImplicitBindingOrderID = Value;
4817+
}
4818+
bool hasImplicitBindingOrderID() const {
4819+
return ImplicitBindingOrderID.has_value();
4820+
}
4821+
uint32_t getImplicitBindingOrderID() const {
4822+
assert(hasImplicitBindingOrderID() && "attribute does not have implicit binding order id");
4823+
return ImplicitBindingOrderID.value();
4824+
}
48144825
}];
48154826
}
48164827

clang/lib/AST/ExprConstant.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3314,7 +3314,11 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj,
33143314
return false;
33153315

33163316
// Extract most-derived object and corresponding type.
3317-
DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl();
3317+
// FIXME: After implementing P2280R4 it became possible to get references
3318+
// here. We do MostDerivedType->getAsCXXRecordDecl() in several other
3319+
// locations and if we see crashes in those locations in the future
3320+
// it may make more sense to move this fix into Lvalue::set.
3321+
DerivedDecl = D.MostDerivedType.getNonReferenceType()->getAsCXXRecordDecl();
33183322
if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength))
33193323
return false;
33203324

0 commit comments

Comments
 (0)