Skip to content

Commit 10860d1

Browse files
authored
Merge branch 'main' into main
2 parents 12561c4 + 7b473df commit 10860d1

File tree

1,165 files changed

+57688
-39128
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,165 files changed

+57688
-39128
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@
131131
/bolt/ @aaupov @maksfb @rafaelauler @ayermolo @dcci @yota9
132132

133133
# Bazel build system.
134-
/utils/bazel/ @rupprecht @keith
134+
/utils/bazel/ @rupprecht @keith @aaronmondal
135135

136136
# InstallAPI and TextAPI
137137
/llvm/**/TextAPI/ @cyndyishida

.github/workflows/release-binaries-all.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ on:
2727
required: true
2828
default: false
2929
type: boolean
30+
secrets:
31+
RELEASE_TASKS_USER_TOKEN:
32+
description: "Secret used to check user permissions."
33+
required: false
3034

3135
pull_request:
3236
types:

bolt/include/bolt/Core/BinarySection.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -359,15 +359,9 @@ class BinarySection {
359359

360360
/// Add a new relocation at the given /p Offset.
361361
void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
362-
uint64_t Addend, uint64_t Value = 0,
363-
bool Pending = false) {
362+
uint64_t Addend, uint64_t Value = 0) {
364363
assert(Offset < getSize() && "offset not within section bounds");
365-
if (!Pending) {
366-
Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
367-
} else {
368-
PendingRelocations.emplace_back(
369-
Relocation{Offset, Symbol, Type, Addend, Value});
370-
}
364+
Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
371365
}
372366

373367
/// Add a dynamic relocation at the given /p Offset.

bolt/tools/driver/llvm-bolt.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -173,24 +173,14 @@ void boltMode(int argc, char **argv) {
173173
}
174174
}
175175

176-
static std::string GetExecutablePath(const char *Argv0) {
177-
SmallString<256> ExecutablePath(Argv0);
178-
// Do a PATH lookup if Argv0 isn't a valid path.
179-
if (!llvm::sys::fs::exists(ExecutablePath))
180-
if (llvm::ErrorOr<std::string> P =
181-
llvm::sys::findProgramByName(ExecutablePath))
182-
ExecutablePath = *P;
183-
return std::string(ExecutablePath);
184-
}
185-
186176
int main(int argc, char **argv) {
187177
// Print a stack trace if we signal out.
188178
sys::PrintStackTraceOnErrorSignal(argv[0]);
189179
PrettyStackTraceProgram X(argc, argv);
190180

191181
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
192182

193-
std::string ToolPath = GetExecutablePath(argv[0]);
183+
std::string ToolPath = llvm::sys::fs::getMainExecutable(argv[0], nullptr);
194184

195185
// Initialize targets and assembly printers/parsers.
196186
llvm::InitializeAllTargetInfos();

bolt/unittests/Core/BinaryContext.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocCALL26) {
9393
DataSize, 4);
9494
MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
9595
ASSERT_TRUE(RelSymbol1);
96-
BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true);
96+
BS.addPendingRelocation(
97+
Relocation{8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0});
9798
MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
9899
ASSERT_TRUE(RelSymbol2);
99-
BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true);
100+
BS.addPendingRelocation(
101+
Relocation{12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0});
100102

101-
std::error_code EC;
102103
SmallVector<char> Vect(DataSize);
103104
raw_svector_ostream OS(Vect);
104105

@@ -134,12 +135,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
134135
(uint8_t *)Data, Size, 4);
135136
MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
136137
ASSERT_TRUE(RelSymbol1);
137-
BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_JUMP26, 0, 0, true);
138+
BS.addPendingRelocation(
139+
Relocation{8, RelSymbol1, ELF::R_AARCH64_JUMP26, 0, 0});
138140
MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
139141
ASSERT_TRUE(RelSymbol2);
140-
BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_JUMP26, 0, 0, true);
142+
BS.addPendingRelocation(
143+
Relocation{12, RelSymbol2, ELF::R_AARCH64_JUMP26, 0, 0});
141144

142-
std::error_code EC;
143145
SmallVector<char> Vect(Size);
144146
raw_svector_ostream OS(Vect);
145147

clang/docs/HLSL/FunctionCalls.rst

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,14 @@ which is a term made up for HLSL. A cx-value is a temporary value which may be
248248
the result of a cast, and stores its value back to an lvalue when the value
249249
expires.
250250

251-
To represent this concept in Clang we introduce a new ``HLSLOutParamExpr``. An
252-
``HLSLOutParamExpr`` has two forms, one with a single sub-expression and one
253-
with two sub-expressions.
251+
To represent this concept in Clang we introduce a new ``HLSLOutArgExpr``. An
252+
``HLSLOutArgExpr`` has three sub-expressions:
254253

255-
The single sub-expression form is used when the argument expression and the
256-
function parameter are the same type, so no cast is required. As in this
257-
example:
254+
* An OpaqueValueExpr of the argument lvalue expression.
255+
* An OpaqueValueExpr of the copy-initialized parameter temporary.
256+
* A BinaryOpExpr assigning the first with the value of the second.
257+
258+
Given this example:
258259

259260
.. code-block:: c++
260261

@@ -267,23 +268,36 @@ example:
267268
Init(V);
268269
}
269270

270-
The expected AST formulation for this code would be something like:
271+
The expected AST formulation for this code would be something like the example
272+
below. Due to the nature of OpaqueValueExpr nodes, the nodes repeat in the AST
273+
dump. The fake addresses ``0xSOURCE`` and ``0xTEMPORARY`` denote the source
274+
lvalue and argument temporary lvalue expressions.
271275

272276
.. code-block:: text
273277
274278
CallExpr 'void'
275279
|-ImplicitCastExpr 'void (*)(int &)' <FunctionToPointerDecay>
276280
| `-DeclRefExpr 'void (int &)' lvalue Function 'Init' 'void (int &)'
277-
|-HLSLOutParamExpr 'int' lvalue inout
278-
`-DeclRefExpr 'int' lvalue Var 'V' 'int'
279-
280-
The ``HLSLOutParamExpr`` captures that the value is ``inout`` vs ``out`` to
281-
denote whether or not the temporary is initialized from the sub-expression. If
282-
no casting is required the sub-expression denotes the lvalue expression that the
283-
cx-value will be copied to when the value expires.
284-
285-
The two sub-expression form of the AST node is required when the argument type
286-
is not the same as the parameter type. Given this example:
281+
`-HLSLOutArgExpr <col:10> 'int' lvalue inout
282+
|-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
283+
| `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
284+
|-OpaqueValueExpr 0xTEMPORARY <col:10> 'int' lvalue
285+
| `-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
286+
| `-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
287+
| `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
288+
`-BinaryOperator <col:10> 'int' lvalue '='
289+
|-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
290+
| `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
291+
`-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
292+
`-OpaqueValueExpr 0xTEMPORARY <col:10> 'int' lvalue
293+
`-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
294+
`-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
295+
`-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
296+
297+
The ``HLSLOutArgExpr`` captures that the value is ``inout`` vs ``out`` to
298+
denote whether or not the temporary is initialized from the sub-expression.
299+
300+
The example below demonstrates argument casting:
287301

288302
.. code-block:: c++
289303

@@ -295,28 +309,39 @@ is not the same as the parameter type. Given this example:
295309
Trunc(F);
296310
}
297311

298-
For this case the ``HLSLOutParamExpr`` will have sub-expressions to record both
312+
For this case the ``HLSLOutArgExpr`` will have sub-expressions to record both
299313
casting expression sequences for the initialization and write back:
300314

301315
.. code-block:: text
302316
303317
-CallExpr 'void'
304318
|-ImplicitCastExpr 'void (*)(int3 &)' <FunctionToPointerDecay>
305319
| `-DeclRefExpr 'void (int3 &)' lvalue Function 'inc_i32' 'void (int3 &)'
306-
`-HLSLOutParamExpr 'int3' lvalue inout
307-
|-ImplicitCastExpr 'float3' <IntegralToFloating>
308-
| `-ImplicitCastExpr 'int3' <LValueToRValue>
309-
| `-OpaqueValueExpr 'int3' lvalue
310-
`-ImplicitCastExpr 'int3' <FloatingToIntegral>
311-
`-ImplicitCastExpr 'float3' <LValueToRValue>
312-
`-DeclRefExpr 'float3' lvalue 'F' 'float3'
313-
314-
In this formation the write-back casts are captured as the first sub-expression
315-
and they cast from an ``OpaqueValueExpr``. In IR generation we can use the
316-
``OpaqueValueExpr`` as a placeholder for the ``HLSLOutParamExpr``'s temporary
317-
value on function return.
318-
319-
In code generation this can be implemented with some targeted extensions to the
320-
Objective-C write-back support. Specifically extending CGCall.cpp's
321-
``EmitWriteback`` function to support casting expressions and emission of
322-
aggregate lvalues.
320+
`-HLSLOutArgExpr <col:11> 'int3':'vector<int, 3>' lvalue inout
321+
|-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
322+
| `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
323+
|-OpaqueValueExpr 0xTEMPORARY <col:11> 'int3':'vector<int, 3>' lvalue
324+
| `-ImplicitCastExpr <col:11> 'vector<int, 3>' <FloatingToIntegral>
325+
| `-ImplicitCastExpr <col:11> 'float3':'vector<float, 3>' <LValueToRValue>
326+
| `-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
327+
| `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
328+
`-BinaryOperator <col:11> 'float3':'vector<float, 3>' lvalue '='
329+
|-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
330+
| `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
331+
`-ImplicitCastExpr <col:11> 'vector<float, 3>' <IntegralToFloating>
332+
`-ImplicitCastExpr <col:11> 'int3':'vector<int, 3>' <LValueToRValue>
333+
`-OpaqueValueExpr 0xTEMPORARY <col:11> 'int3':'vector<int, 3>' lvalue
334+
`-ImplicitCastExpr <col:11> 'vector<int, 3>' <FloatingToIntegral>
335+
`-ImplicitCastExpr <col:11> 'float3':'vector<float, 3>' <LValueToRValue>
336+
`-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
337+
`-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
338+
339+
The AST representation is the same whether casting is required or not, which
340+
simplifies the code generation. IR generation does the following:
341+
342+
* Emit the argument lvalue expression.
343+
* Initialize the argument:
344+
* For ``inout`` arguments, emit the copy-initialization expression.
345+
* For ``out`` arguments, emit an uninitialized temporary.
346+
* Emit the call
347+
* Emit the write-back BinaryOperator expression.

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ Non-comprehensive list of changes in this release
104104
New Compiler Flags
105105
------------------
106106

107+
- New option ``-fprofile-continuous`` added to enable continuous profile syncing to file (#GH124353, `docs <https://clang.llvm.org/docs/UsersManual.html#cmdoption-fprofile-continuous>`_).
108+
The feature has `existed <https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program>`_)
109+
for a while and this is just a user facing option.
110+
107111
Deprecated Compiler Flags
108112
-------------------------
109113

@@ -129,6 +133,8 @@ Improvements to Clang's diagnostics
129133
which are supposed to only exist once per program, but may get duplicated when
130134
built into a shared library.
131135
- Fixed a bug where Clang's Analysis did not correctly model the destructor behavior of ``union`` members (#GH119415).
136+
- A statement attribute applied to a ``case`` label no longer suppresses
137+
'bypassing variable initialization' diagnostics (#84072).
132138

133139
Improvements to Clang's time-trace
134140
----------------------------------

clang/docs/TypeSanitizer.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ reduce these impacts.
2727
The TypeSanitizer Algorithm
2828
===========================
2929
For each TBAA type-access descriptor, encoded in LLVM IR using TBAA Metadata, the instrumentation
30-
pass generates descriptor tales. Thus there is a unique pointer to each type (and access descriptor).
30+
pass generates descriptor tables. Thus there is a unique pointer to each type (and access descriptor).
3131
These tables are comdat (except for anonymous-namespace types), so the pointer values are unique
3232
across the program.
3333

clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst

Lines changed: 93 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ Performance Investigation
55
Multiple factors contribute to the time it takes to analyze a file with Clang Static Analyzer.
66
A translation unit contains multiple entry points, each of which take multiple steps to analyze.
77

8+
Performance analysis using ``-ftime-trace``
9+
===========================================
10+
811
You can add the ``-ftime-trace=file.json`` option to break down the analysis time into individual entry points and steps within each entry point.
912
You can explore the generated JSON file in a Chromium browser using the ``chrome://tracing`` URL,
1013
or using `speedscope <https://speedscope.app>`_.
@@ -19,9 +22,8 @@ Here is an example of a time trace produced with
1922
.. code-block:: bash
2023
:caption: Clang Static Analyzer invocation to generate a time trace of string.c analysis.
2124
22-
clang -cc1 -nostdsysteminc -analyze -analyzer-constraints=range \
23-
-setup-static-analyzer -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
24-
-verify ./clang/test/Analysis/string.c \
25+
clang -cc1 -analyze -verify clang/test/Analysis/string.c \
26+
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
2527
-ftime-trace=trace.json -ftime-trace-granularity=1
2628
2729
.. image:: ../images/speedscope.png
@@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might struggle with time traces a
4547
Luckily, in most cases the default max-steps boundary of 225 000 produces the traces of approximately that size
4648
for a single entry point.
4749
You can use ``-analyze-function=get_global_options`` together with ``-ftime-trace`` to narrow down analysis to a specific entry point.
50+
51+
52+
Performance analysis using ``perf``
53+
===================================
54+
55+
`Perf <https://perfwiki.github.io/main/>`_ is a tool for conducting sampling-based profiling.
56+
It's easy to start profiling, you only have 2 prerequisites.
57+
Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
58+
You can use release builds, but probably the easiest is to set the ``CMAKE_BUILD_TYPE=RelWithDebInfo``
59+
along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring ``llvm``.
60+
Here is how to `get started <https://llvm.org/docs/CMake.html#quick-start>`_ if you are in trouble.
61+
62+
.. code-block:: bash
63+
:caption: Running the Clang Static Analyzer through ``perf`` to gather samples of the execution.
64+
65+
# -F: Sampling frequency, use `-F max` for maximal frequency
66+
# -g: Enable call-graph recording for both kernel and user space
67+
perf record -F 99 -g -- clang -cc1 -analyze -verify clang/test/Analysis/string.c \
68+
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
69+
70+
Once you have the profile data, you can use it to produce a Flame graph.
71+
A Flame graph is a visual representation of the stack frames of the samples.
72+
Common stack frame prefixes are squashed together, making up a wider bar.
73+
The wider the bar, the more time was spent under that particular stack frame,
74+
giving a sense of how the overall execution time was spent.
75+
76+
Clone the `FlameGraph <https://github.com/brendangregg/FlameGraph>`_ git repository,
77+
as we will use some scripts from there to convert the ``perf`` samples into a Flame graph.
78+
It's also useful to check out Brendan Gregg's (the author of FlameGraph)
79+
`homepage <https://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html>`_.
80+
81+
82+
.. code-block:: bash
83+
:caption: Converting the ``perf`` profile into a Flamegraph, then opening it in Firefox.
84+
85+
perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
86+
/path/to/FlameGraph/flamegraph.pl perf.folded > perf.svg
87+
firefox perf.svg
88+
89+
.. image:: ../images/flamegraph.png
90+
91+
92+
Performance analysis using ``uftrace``
93+
======================================
94+
95+
`uftrace <https://github.com/namhyung/uftrace/wiki/Tutorial#getting-started>`_ is a great tool to generate rich profile data
96+
that you can use to focus and drill down into the timeline of your application.
97+
We will use it to generate Chromium trace JSON.
98+
In contrast to ``perf``, this approach statically instruments every function, so it should be more precise and thorough than the sampling-based approaches like ``perf``.
99+
In contrast to using ``-ftime-trace``, functions don't need to opt-in to be profiled using ``llvm::TimeTraceScope``.
100+
All functions are profiled due to automatic static instrumentation.
101+
102+
There is only one prerequisite to use this tool.
103+
You need to build the binary you are about to instrument using ``-pg`` or ``-finstrument-functions``.
104+
This will make it run substantially slower but allows rich instrumentation.
105+
It will also consume many gigabites of storage for a single trace unless filter flags are used during recording.
106+
107+
.. code-block:: bash
108+
:caption: Recording with ``uftrace``, then dumping the result as a Chrome trace JSON.
109+
110+
uftrace record clang -cc1 -analyze -verify clang/test/Analysis/string.c \
111+
-analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
112+
uftrace dump --filter=".*::AnalysisConsumer::HandleTranslationUnit" --time-filter=300 --chrome > trace.json
113+
114+
.. image:: ../images/uftrace_detailed.png
115+
116+
In this picture, you can see the functions below the Static Analyzer's entry point, which takes at least 300 nanoseconds to run, visualized by Chrome's ``about:tracing`` page
117+
You can also see how deep function calls we may have due to AST visitors.
118+
119+
Using different filters can reduce the number of functions to record.
120+
For the common options, refer to the ``uftrace`` `documentation <https://github.com/namhyung/uftrace/blob/master/doc/uftrace-record.md#common-options>`_.
121+
122+
Similar filters can be applied for dumping too. That way you can reuse the same (detailed)
123+
recording to selectively focus on some special part using a refinement of the filter flags.
124+
Remember, the trace JSON needs to fit into Chrome's ``about:tracing`` or `speedscope <https://speedscope.app>`_,
125+
thus it needs to be of a limited size.
126+
If you do not apply filters on recording, you will collect a large trace and every dump operation
127+
would need to sieve through the much larger recording which may be annoying if done repeatedly.
128+
129+
If the trace JSON is still too large to load, have a look at the dump as plain text and look for frequent entries that refer to non-interesting parts.
130+
Once you have some of those, add them as ``--hide`` flags to the ``uftrace dump`` call.
131+
To see what functions appear frequently in the trace, use this command:
132+
133+
.. code-block:: bash
134+
135+
cat trace.json | grep -Po '"name":"(.+)"' | sort | uniq -c | sort -nr | head -n 50
136+
137+
``uftrace`` can also dump the report as a Flame graph using ``uftrace dump --framegraph``.
72.6 KB
Loading

0 commit comments

Comments
 (0)