llvm
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 1 deletion b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-binaries-all.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/release-binaries-all.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎bolt/include/bolt/Core/BinarySection.h‎
Lines changed: 2 additions & 8 deletions b/‎bolt/include/bolt/Core/BinarySection.h‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎bolt/tools/driver/llvm-bolt.cpp‎
Lines changed: 1 addition & 11 deletions b/‎bolt/tools/driver/llvm-bolt.cpp‎
Lines changed: 1 addition & 11 deletions
diff --git a/‎bolt/unittests/Core/BinaryContext.cpp‎
Lines changed: 8 additions & 6 deletions b/‎bolt/unittests/Core/BinaryContext.cpp‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎clang/docs/HLSL/FunctionCalls.rst‎
Lines changed: 60 additions & 35 deletions b/‎clang/docs/HLSL/FunctionCalls.rst‎
Lines changed: 60 additions & 35 deletions
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 6 additions & 0 deletions b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎clang/docs/TypeSanitizer.rst‎
Lines changed: 1 addition & 1 deletion b/‎clang/docs/TypeSanitizer.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst‎
Lines changed: 93 additions & 3 deletions b/‎clang/docs/analyzer/developer-docs/PerformanceInvestigation.rst‎
Lines changed: 93 additions & 3 deletions
diff --git a/‎clang/docs/analyzer/images/flamegraph.png‎
72.6 KB b/‎clang/docs/analyzer/images/flamegraph.png‎
72.6 KB
@@ -131,7 +131,7 @@
 /bolt/ @aaupov @maksfb @rafaelauler @ayermolo @dcci @yota9
 
 # Bazel build system.
-/utils/bazel/ @rupprecht @keith
+/utils/bazel/ @rupprecht @keith @aaronmondal
 
 # InstallAPI and TextAPI
 /llvm/**/TextAPI/ @cyndyishida
 
@@ -27,6 +27,10 @@ on:
         required: true
         default: false
         type: boolean
+    secrets:
+      RELEASE_TASKS_USER_TOKEN:
+        description: "Secret used to check user permissions."
+        required: false
 
   pull_request:
     types:
 
@@ -359,15 +359,9 @@ class BinarySection {
 
   /// Add a new relocation at the given /p Offset.
   void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
-                     uint64_t Addend, uint64_t Value = 0,
-                     bool Pending = false) {
+                     uint64_t Addend, uint64_t Value = 0) {
     assert(Offset < getSize() && "offset not within section bounds");
-    if (!Pending) {
-      Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
-    } else {
-      PendingRelocations.emplace_back(
-          Relocation{Offset, Symbol, Type, Addend, Value});
-    }
+    Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
   }
 
   /// Add a dynamic relocation at the given /p Offset.
 
@@ -173,24 +173,14 @@ void boltMode(int argc, char **argv) {
   }
 }
 
-static std::string GetExecutablePath(const char *Argv0) {
-  SmallString<256> ExecutablePath(Argv0);
-  // Do a PATH lookup if Argv0 isn't a valid path.
-  if (!llvm::sys::fs::exists(ExecutablePath))
-    if (llvm::ErrorOr<std::string> P =
-            llvm::sys::findProgramByName(ExecutablePath))
-      ExecutablePath = *P;
-  return std::string(ExecutablePath);
-}
-
 int main(int argc, char **argv) {
   // Print a stack trace if we signal out.
   sys::PrintStackTraceOnErrorSignal(argv[0]);
   PrettyStackTraceProgram X(argc, argv);
 
   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 
-  std::string ToolPath = GetExecutablePath(argv[0]);
+  std::string ToolPath = llvm::sys::fs::getMainExecutable(argv[0], nullptr);
 
   // Initialize targets and assembly printers/parsers.
   llvm::InitializeAllTargetInfos();
 
@@ -93,12 +93,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocCALL26) {
       DataSize, 4);
   MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
   ASSERT_TRUE(RelSymbol1);
-  BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true);
+  BS.addPendingRelocation(
+      Relocation{8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0});
   MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
   ASSERT_TRUE(RelSymbol2);
-  BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true);
+  BS.addPendingRelocation(
+      Relocation{12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0});
 
-  std::error_code EC;
   SmallVector<char> Vect(DataSize);
   raw_svector_ostream OS(Vect);
 
@@ -134,12 +135,13 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
       (uint8_t *)Data, Size, 4);
   MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
   ASSERT_TRUE(RelSymbol1);
-  BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_JUMP26, 0, 0, true);
+  BS.addPendingRelocation(
+      Relocation{8, RelSymbol1, ELF::R_AARCH64_JUMP26, 0, 0});
   MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
   ASSERT_TRUE(RelSymbol2);
-  BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_JUMP26, 0, 0, true);
+  BS.addPendingRelocation(
+      Relocation{12, RelSymbol2, ELF::R_AARCH64_JUMP26, 0, 0});
 
-  std::error_code EC;
   SmallVector<char> Vect(Size);
   raw_svector_ostream OS(Vect);
 
 
@@ -248,13 +248,14 @@ which is a term made up for HLSL. A cx-value is a temporary value which may be
 the result of a cast, and stores its value back to an lvalue when the value
 expires.
 
-To represent this concept in Clang we introduce a new ``HLSLOutParamExpr``. An
-``HLSLOutParamExpr`` has two forms, one with a single sub-expression and one
-with two sub-expressions.
+To represent this concept in Clang we introduce a new ``HLSLOutArgExpr``. An
+``HLSLOutArgExpr`` has three sub-expressions:
 
-The single sub-expression form is used when the argument expression and the
-function parameter are the same type, so no cast is required. As in this
-example:
+* An OpaqueValueExpr of the argument lvalue expression.
+* An OpaqueValueExpr of the copy-initialized parameter temporary.
+* A BinaryOpExpr assigning the first with the value of the second.
+
+Given this example:
 
 .. code-block:: c++
 
@@ -267,23 +268,36 @@ example:
     Init(V);
   }
 
-The expected AST formulation for this code would be something like:
+The expected AST formulation for this code would be something like the example
+below. Due to the nature of OpaqueValueExpr nodes, the nodes repeat in the AST
+dump. The fake addresses ``0xSOURCE`` and ``0xTEMPORARY`` denote the source
+lvalue and argument temporary lvalue expressions.
 
 .. code-block:: text
 
   CallExpr 'void'
   |-ImplicitCastExpr 'void (*)(int &)' <FunctionToPointerDecay>
   | `-DeclRefExpr 'void (int &)' lvalue Function  'Init' 'void (int &)'
-  |-HLSLOutParamExpr 'int' lvalue inout
-    `-DeclRefExpr 'int' lvalue Var 'V' 'int'
-
-The ``HLSLOutParamExpr`` captures that the value is ``inout`` vs ``out`` to
-denote whether or not the temporary is initialized from the sub-expression. If
-no casting is required the sub-expression denotes the lvalue expression that the
-cx-value will be copied to when the value expires.
-
-The two sub-expression form of the AST node is required when the argument type
-is not the same as the parameter type. Given this example:
+  `-HLSLOutArgExpr <col:10> 'int' lvalue inout
+    |-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
+    | `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
+    |-OpaqueValueExpr 0xTEMPORARY <col:10> 'int' lvalue
+    | `-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
+    |   `-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
+    |     `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
+    `-BinaryOperator <col:10> 'int' lvalue '='
+      |-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
+      | `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
+      `-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
+        `-OpaqueValueExpr 0xTEMPORARY <col:10> 'int' lvalue
+          `-ImplicitCastExpr <col:10> 'int' <LValueToRValue>
+            `-OpaqueValueExpr 0xSOURCE <col:10> 'int' lvalue
+              `-DeclRefExpr <col:10> 'int' lvalue Var 'V' 'int'
+
+The ``HLSLOutArgExpr`` captures that the value is ``inout`` vs ``out`` to
+denote whether or not the temporary is initialized from the sub-expression.
+
+The example below demonstrates argument casting:
 
 .. code-block:: c++
 
@@ -295,28 +309,39 @@ is not the same as the parameter type. Given this example:
     Trunc(F);
   }
 
-For this case the ``HLSLOutParamExpr`` will have sub-expressions to record both
+For this case the ``HLSLOutArgExpr`` will have sub-expressions to record both
 casting expression sequences for the initialization and write back:
 
 .. code-block:: text
 
   -CallExpr 'void'
     |-ImplicitCastExpr 'void (*)(int3 &)' <FunctionToPointerDecay>
     | `-DeclRefExpr 'void (int3 &)' lvalue Function 'inc_i32' 'void (int3 &)'
-    `-HLSLOutParamExpr 'int3' lvalue inout
-      |-ImplicitCastExpr 'float3' <IntegralToFloating>
-      | `-ImplicitCastExpr 'int3' <LValueToRValue>
-      |   `-OpaqueValueExpr 'int3' lvalue
-      `-ImplicitCastExpr 'int3' <FloatingToIntegral>
-        `-ImplicitCastExpr 'float3' <LValueToRValue>
-          `-DeclRefExpr 'float3' lvalue 'F' 'float3'
-
-In this formation the write-back casts are captured as the first sub-expression
-and they cast from an ``OpaqueValueExpr``. In IR generation we can use the
-``OpaqueValueExpr`` as a placeholder for the ``HLSLOutParamExpr``'s temporary
-value on function return.
-
-In code generation this can be implemented with some targeted extensions to the
-Objective-C write-back support. Specifically extending CGCall.cpp's
-``EmitWriteback`` function to support casting expressions and emission of
-aggregate lvalues.
+    `-HLSLOutArgExpr <col:11> 'int3':'vector<int, 3>' lvalue inout
+      |-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
+      | `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
+      |-OpaqueValueExpr 0xTEMPORARY <col:11> 'int3':'vector<int, 3>' lvalue
+      | `-ImplicitCastExpr <col:11> 'vector<int, 3>' <FloatingToIntegral>
+      |   `-ImplicitCastExpr <col:11> 'float3':'vector<float, 3>' <LValueToRValue>
+      |     `-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
+      |       `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
+      `-BinaryOperator <col:11> 'float3':'vector<float, 3>' lvalue '='
+        |-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
+        | `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
+        `-ImplicitCastExpr <col:11> 'vector<float, 3>' <IntegralToFloating>
+          `-ImplicitCastExpr <col:11> 'int3':'vector<int, 3>' <LValueToRValue>
+            `-OpaqueValueExpr 0xTEMPORARY <col:11> 'int3':'vector<int, 3>' lvalue
+              `-ImplicitCastExpr <col:11> 'vector<int, 3>' <FloatingToIntegral>
+                `-ImplicitCastExpr <col:11> 'float3':'vector<float, 3>' <LValueToRValue>
+                  `-OpaqueValueExpr 0xSOURCE <col:11> 'float3':'vector<float, 3>' lvalue
+                    `-DeclRefExpr <col:11> 'float3':'vector<float, 3>' lvalue Var 'F' 'float3':'vector<float, 3>'
+
+The AST representation is the same whether casting is required or not, which
+simplifies the code generation. IR generation does the following:
+
+* Emit the argument lvalue expression.
+* Initialize the argument:
+  * For ``inout`` arguments, emit the copy-initialization expression.
+  * For ``out`` arguments, emit an uninitialized temporary.
+* Emit the call
+* Emit the write-back BinaryOperator expression.
@@ -104,6 +104,10 @@ Non-comprehensive list of changes in this release
 New Compiler Flags
 ------------------
 
+- New option ``-fprofile-continuous`` added to enable continuous profile syncing to file (#GH124353, `docs <https://clang.llvm.org/docs/UsersManual.html#cmdoption-fprofile-continuous>`_).
+  The feature has `existed <https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program>`_)
+  for a while and this is just a user facing option.
+
 Deprecated Compiler Flags
 -------------------------
 
@@ -129,6 +133,8 @@ Improvements to Clang's diagnostics
   which are supposed to only exist once per program, but may get duplicated when
   built into a shared library.
 - Fixed a bug where Clang's Analysis did not correctly model the destructor behavior of ``union`` members (#GH119415).
+- A statement attribute applied to a ``case`` label no longer suppresses
+  'bypassing variable initialization' diagnostics (#84072).
 
 Improvements to Clang's time-trace
 ----------------------------------
 
@@ -27,7 +27,7 @@ reduce these impacts.
 The TypeSanitizer Algorithm
 ===========================
 For each TBAA type-access descriptor, encoded in LLVM IR using TBAA Metadata, the instrumentation 
-pass generates descriptor tales. Thus there is a unique pointer to each type (and access descriptor).
+pass generates descriptor tables. Thus there is a unique pointer to each type (and access descriptor).
 These tables are comdat (except for anonymous-namespace types), so the pointer values are unique 
 across the program.
 
 
@@ -5,6 +5,9 @@ Performance Investigation
 Multiple factors contribute to the time it takes to analyze a file with Clang Static Analyzer.
 A translation unit contains multiple entry points, each of which take multiple steps to analyze.
 
+Performance analysis using ``-ftime-trace``
+===========================================
+
 You can add the ``-ftime-trace=file.json`` option to break down the analysis time into individual entry points and steps within each entry point.
 You can explore the generated JSON file in a Chromium browser using the ``chrome://tracing`` URL,
 or using `speedscope <https://speedscope.app>`_.
@@ -19,9 +22,8 @@ Here is an example of a time trace produced with
 .. code-block:: bash
    :caption: Clang Static Analyzer invocation to generate a time trace of string.c analysis.
 
-   clang -cc1 -nostdsysteminc -analyze -analyzer-constraints=range \
-         -setup-static-analyzer -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
-         -verify ./clang/test/Analysis/string.c \
+   clang -cc1 -analyze -verify clang/test/Analysis/string.c \
+         -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection \
          -ftime-trace=trace.json -ftime-trace-granularity=1
 
 .. image:: ../images/speedscope.png
@@ -45,3 +47,91 @@ Note: Both Chrome-tracing and speedscope tools might struggle with time traces a
 Luckily, in most cases the default max-steps boundary of 225 000 produces the traces of approximately that size
 for a single entry point.
 You can use ``-analyze-function=get_global_options`` together with ``-ftime-trace`` to narrow down analysis to a specific entry point.
+
+
+Performance analysis using ``perf``
+===================================
+
+`Perf <https://perfwiki.github.io/main/>`_ is a tool for conducting sampling-based profiling.
+It's easy to start profiling, you only have 2 prerequisites.
+Build with ``-fno-omit-frame-pointer`` and debug info (``-g``).
+You can use release builds, but probably the easiest is to set the ``CMAKE_BUILD_TYPE=RelWithDebInfo``
+along with ``CMAKE_CXX_FLAGS="-fno-omit-frame-pointer"`` when configuring ``llvm``.
+Here is how to `get started <https://llvm.org/docs/CMake.html#quick-start>`_ if you are in trouble.
+
+.. code-block:: bash
+   :caption: Running the Clang Static Analyzer through ``perf`` to gather samples of the execution.
+
+   # -F: Sampling frequency, use `-F max` for maximal frequency
+   # -g: Enable call-graph recording for both kernel and user space
+   perf record -F 99 -g --  clang -cc1 -analyze -verify clang/test/Analysis/string.c \
+         -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
+
+Once you have the profile data, you can use it to produce a Flame graph.
+A Flame graph is a visual representation of the stack frames of the samples.
+Common stack frame prefixes are squashed together, making up a wider bar.
+The wider the bar, the more time was spent under that particular stack frame,
+giving a sense of how the overall execution time was spent.
+
+Clone the `FlameGraph <https://github.com/brendangregg/FlameGraph>`_ git repository,
+as we will use some scripts from there to convert the ``perf`` samples into a Flame graph.
+It's also useful to check out Brendan Gregg's (the author of FlameGraph)
+`homepage <https://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html>`_.
+
+
+.. code-block:: bash
+   :caption: Converting the ``perf`` profile into a Flamegraph, then opening it in Firefox.
+
+   perf script | /path/to/FlameGraph/stackcollapse-perf.pl > perf.folded
+   /path/to/FlameGraph/flamegraph.pl perf.folded  > perf.svg
+   firefox perf.svg
+
+.. image:: ../images/flamegraph.png
+
+
+Performance analysis using ``uftrace``
+======================================
+
+`uftrace <https://github.com/namhyung/uftrace/wiki/Tutorial#getting-started>`_ is a great tool to generate rich profile data
+that you can use to focus and drill down into the timeline of your application.
+We will use it to generate Chromium trace JSON.
+In contrast to ``perf``, this approach statically instruments every function, so it should be more precise and thorough than the sampling-based approaches like ``perf``.
+In contrast to using ``-ftime-trace``, functions don't need to opt-in to be profiled using ``llvm::TimeTraceScope``.
+All functions are profiled due to automatic static instrumentation.
+
+There is only one prerequisite to use this tool.
+You need to build the binary you are about to instrument using ``-pg`` or ``-finstrument-functions``.
+This will make it run substantially slower but allows rich instrumentation.
+It will also consume many gigabites of storage for a single trace unless filter flags are used during recording.
+
+.. code-block:: bash
+   :caption: Recording with ``uftrace``, then dumping the result as a Chrome trace JSON.
+
+   uftrace record  clang -cc1 -analyze -verify clang/test/Analysis/string.c \
+         -analyzer-checker=core,unix,alpha.unix.cstring,debug.ExprInspection
+   uftrace dump --filter=".*::AnalysisConsumer::HandleTranslationUnit" --time-filter=300 --chrome > trace.json
+
+.. image:: ../images/uftrace_detailed.png
+
+In this picture, you can see the functions below the Static Analyzer's entry point, which takes at least 300 nanoseconds to run, visualized by Chrome's ``about:tracing`` page
+You can also see how deep function calls we may have due to AST visitors.
+
+Using different filters can reduce the number of functions to record.
+For the common options, refer to the ``uftrace`` `documentation <https://github.com/namhyung/uftrace/blob/master/doc/uftrace-record.md#common-options>`_.
+
+Similar filters can be applied for dumping too. That way you can reuse the same (detailed)
+recording to selectively focus on some special part using a refinement of the filter flags.
+Remember, the trace JSON needs to fit into Chrome's ``about:tracing`` or `speedscope <https://speedscope.app>`_,
+thus it needs to be of a limited size.
+If you do not apply filters on recording, you will collect a large trace and every dump operation
+would need to sieve through the much larger recording which may be annoying if done repeatedly.
+
+If the trace JSON is still too large to load, have a look at the dump as plain text and look for frequent entries that refer to non-interesting parts.
+Once you have some of those, add them as ``--hide`` flags to the ``uftrace dump`` call.
+To see what functions appear frequently in the trace, use this command:
+
+.. code-block:: bash
+
+   cat trace.json | grep -Po '"name":"(.+)"' | sort | uniq -c | sort -nr | head -n 50
+
+``uftrace`` can also dump the report as a Flame graph using ``uftrace dump --framegraph``.