ROCm
diff --git a/‎bolt/test/AArch64/veneer-lld-abs.s‎
Lines changed: 1 addition & 1 deletion b/‎bolt/test/AArch64/veneer-lld-abs.s‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bolt/test/X86/double-jump.test‎
Lines changed: 4 additions & 8 deletions b/‎bolt/test/X86/double-jump.test‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎bolt/test/X86/jmp-optimization.test‎
Lines changed: 2 additions & 5 deletions b/‎bolt/test/X86/jmp-optimization.test‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎bolt/test/X86/jump-table-icp.test‎
Lines changed: 14 additions & 18 deletions b/‎bolt/test/X86/jump-table-icp.test‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎bolt/test/X86/shrinkwrapping.test‎
Lines changed: 8 additions & 10 deletions b/‎bolt/test/X86/shrinkwrapping.test‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎bolt/test/dump-dot-func.test‎
Lines changed: 1 addition & 1 deletion b/‎bolt/test/dump-dot-func.test‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 3 additions & 1 deletion b/‎clang/docs/LanguageExtensions.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 24 additions & 9 deletions b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 24 additions & 9 deletions
diff --git a/‎clang/include/clang/AST/Type.h‎
Lines changed: 49 additions & 0 deletions b/‎clang/include/clang/AST/Type.h‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎clang/include/clang/AST/TypeBase.h‎
Lines changed: 8 additions & 8 deletions b/‎clang/include/clang/AST/TypeBase.h‎
Lines changed: 8 additions & 8 deletions
@@ -12,7 +12,7 @@
 
 ## Occasionally, we see the linker not generating $d symbols for long veneers
 ## causing BOLT to fail veneer elimination.
-# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe
+# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe
 # RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \
 # RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
 # RUN: llvm-objdump -d -j .text  %t.no-marker.bolt | \
 
@@ -1,15 +1,11 @@
 ## Test the double jump removal peephole.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
 RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe
-RUN: (llvm-bolt %t.exe --peepholes=double-jumps \
-RUN:   --eliminate-unreachable -o %t 2>&1 \
-RUN:   && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s
+RUN: llvm-bolt %t.exe --peepholes=double-jumps \
+RUN:   --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s
+RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s
 
-CHECK: BOLT-INFO: Peephole: 1 double jumps patched.
+CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched.
 
 CHECK: <_Z3foom>:
 CHECK-NEXT: pushq %rbp
 
@@ -1,10 +1,7 @@
 ## Tests the optimization of functions that just do a tail call in the beginning.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. unsupported parameter expansion
-REQUIRES: shell
-
-RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \
+RUN:   %S/Inputs/jmp_opt3.cpp -o %t
 RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
 RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
 
 
@@ -4,20 +4,16 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm
 RUN: llvm-strip --strip-unneeded %t.o
 RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution
-REQUIRES: shell
-
-RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
+RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
 RUN:   --reorder-blocks=cache --split-functions --split-all-cold \
 RUN:   --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \
 RUN:   --print-icp -v=0 \
 RUN:   --enable-bat --print-cache-metrics \
 RUN:   --icp-jt-remaining-percent-threshold=10 \
 RUN:   --icp-jt-total-percent-threshold=2 \
 RUN:   --indirect-call-promotion-topn=1 \
-RUN:   --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \
-RUN:   llvm-objdump -d %t --print-imm-hex) | FileCheck %s
+RUN:   --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s
+RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s
 
 BOLT-INFO: ICP total indirect callsites = 0
 BOLT-INFO: ICP total jump table callsites = 2
@@ -107,14 +103,14 @@ CHECK-NEXT:   Exec Count : 140
 CHECK:   Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
 CHECK:   Successors: .Ltmp{{.*}} (mispreds: 0, count: 98)
 
-CHECK:     <_Z3inci>:
-CHECK:        	movq    0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT:    cmpq    $0x{{.*}}, %rax
-CHECK-NEXT:    je {{.*}} <_Z3inci+0x{{.*}}>
-CHECK-NEXT:   	jmpq   *%rax
-
-CHECK:     <_Z7inc_dupi>:
-CHECK:        	movq    0x{{.*}}(,%rax,8), %rax
-CHECK-NEXT:    cmpq $0x{{.*}}, %rax
-CHECK-NEXT:    je {{.*}} <_Z7inc_dupi+0x{{.*}}>
-CHECK-NEXT:   	jmpq   *%rax
+CHECK-ASM:     <_Z3inci>:
+CHECK-ASM:        	movq    0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT:    cmpq    $0x{{.*}}, %rax
+CHECK-ASM-NEXT:    je {{.*}} <_Z3inci+0x{{.*}}>
+CHECK-ASM-NEXT:   	jmpq   *%rax
+
+CHECK-ASM:     <_Z7inc_dupi>:
+CHECK-ASM:        	movq    0x{{.*}}(,%rax,8), %rax
+CHECK-ASM-NEXT:    cmpq $0x{{.*}}, %rax
+CHECK-ASM-NEXT:    je {{.*}} <_Z7inc_dupi+0x{{.*}}>
+CHECK-ASM-NEXT:   	jmpq   *%rax
@@ -2,23 +2,21 @@
 ## shrink-wrapping when optimizing a function without
 ## frame pointers.
 
-## This test has commands that rely on shell capabilities that won't execute
-## correctly on Windows e.g. subshell execution to capture command output.
-REQUIRES: shell
-
 RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
 RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
 RUN:   --print-only=main --print-cfg \
 RUN:   --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
 RUN:   FileCheck %s --check-prefix=CHECK-BOLT
 
-RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \
-RUN:   `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
-RUN:    cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
+RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
+RUN:    cut -f1 -d' ' | tail -c9 > %t.input_address
+RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \
+RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
 
-RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
-RUN:   `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
-RUN:    tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
+RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \
+RUN:   cut -f1 -d' ' | tail -c9 > %t.output_address
+RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \
+RUN:   2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
 
 CHECK-BOLT: Extern Entry Count: 100
 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
 
@@ -1,7 +1,7 @@
 # Test the --dump-dot-func option with multiple functions 
 # (includes tests for both mangled/unmangled names)
 
-RUN: %clang++ %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
+RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
 
 # Test 1: --dump-dot-func with specific function name (mangled)
 RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD
 
@@ -943,7 +943,9 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
 
 Each builtin accesses memory according to a provided boolean mask. These are
 provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
-argument is always boolean mask vector.
+argument is always boolean mask vector. The ``__builtin_masked_load`` builtin
+takes an optional third vector argument that will be used for the result of the
+masked-off lanes. These builtins assume the memory is always aligned.
 
 Example:
 
 
@@ -139,14 +139,27 @@ Non-comprehensive list of changes in this release
 
 - Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
 
-- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for
-  trapping into the generated debug info. This feature allows debuggers (e.g. LLDB) to display
-  the reason for trapping if the trap is reached. The string is currently encoded in the debug
-  info as an artificial frame that claims to be inlined at the trap location. The function used
-  for the artificial frame is an artificial function whose name encodes the reason for trapping.
-  The encoding used is currently the same as ``__builtin_verbose_trap`` but might change in the future.
-  This feature is enabled by default but can be disabled by compiling with
-  ``-fno-sanitize-annotate-debug-info-traps``.
+- Trapping UBSan (e.g. ``-fsanitize=undefined -fsanitize-trap=undefined``) now
+  emits a string describing the reason for trapping into the generated debug
+  info. This feature allows debuggers (e.g. LLDB) to display the reason for
+  trapping if the trap is reached. The string is currently encoded in the debug
+  info as an artificial frame that claims to be inlined at the trap location.
+  The function used for the artificial frame is an artificial function whose
+  name encodes the reason for trapping. The encoding used is currently the same
+  as ``__builtin_verbose_trap`` but might change in the future. This feature is
+  enabled by default but can be disabled by compiling with
+  ``-fno-sanitize-debug-trap-reasons``. The feature has a ``basic`` and
+  ``detailed`` mode (the default). The ``basic`` mode emits a hard-coded string
+  per trap kind (e.g. ``Integer addition overflowed``) and the ``detailed`` mode
+  emits a more descriptive string describing each individual trap (e.g. ``signed
+  integer addition overflow in 'a + b'``). The ``detailed`` mode produces larger
+  debug info than ``basic`` but is more helpful for debugging. The
+  ``-fsanitize-debug-trap-reasons=`` flag can be used to switch between the
+  different modes or disable the feature entirely. Note due to trap merging in
+  optimized builds (i.e. in each function all traps of the same kind get merged
+  into the same trap instruction) the trap reasons might be removed. To prevent
+  this build without optimizations (i.e. use `-O0` or use the `optnone` function
+  attribute) or use the `fno-sanitize-merge=` flag in optimized builds.
 
 - ``__builtin_elementwise_max`` and ``__builtin_elementwise_min`` functions for integer types can
   now be used in constant expressions.
@@ -183,7 +196,9 @@ Non-comprehensive list of changes in this release
 
 New Compiler Flags
 ------------------
-- New option ``-fno-sanitize-annotate-debug-info-traps`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
+- New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
+- New option ``-fsanitize-debug-trap-reasons=`` added to control emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
+
 
 Lanai Support
 ^^^^^^^^^^^^^^
 
@@ -23,6 +23,55 @@
 
 namespace clang {
 
+inline CXXRecordDecl *Type::getAsCXXRecordDecl() const {
+  const auto *TT = dyn_cast<TagType>(CanonicalType);
+  if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
+    return nullptr;
+  auto *TD = TT->getOriginalDecl();
+  if (isa<RecordType>(TT) && !isa<CXXRecordDecl>(TD))
+    return nullptr;
+  return cast<CXXRecordDecl>(TD)->getDefinitionOrSelf();
+}
+
+inline CXXRecordDecl *Type::castAsCXXRecordDecl() const {
+  const auto *TT = cast<TagType>(CanonicalType);
+  return cast<CXXRecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+}
+
+inline RecordDecl *Type::getAsRecordDecl() const {
+  const auto *TT = dyn_cast<TagType>(CanonicalType);
+  if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
+    return nullptr;
+  return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+}
+
+inline RecordDecl *Type::castAsRecordDecl() const {
+  const auto *TT = cast<TagType>(CanonicalType);
+  return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
+}
+
+inline EnumDecl *Type::getAsEnumDecl() const {
+  if (const auto *TT = dyn_cast<EnumType>(CanonicalType))
+    return TT->getOriginalDecl()->getDefinitionOrSelf();
+  return nullptr;
+}
+
+inline EnumDecl *Type::castAsEnumDecl() const {
+  return cast<EnumType>(CanonicalType)
+      ->getOriginalDecl()
+      ->getDefinitionOrSelf();
+}
+
+inline TagDecl *Type::getAsTagDecl() const {
+  if (const auto *TT = dyn_cast<TagType>(CanonicalType))
+    return TT->getOriginalDecl()->getDefinitionOrSelf();
+  return nullptr;
+}
+
+inline TagDecl *Type::castAsTagDecl() const {
+  return cast<TagType>(CanonicalType)->getOriginalDecl()->getDefinitionOrSelf();
+}
+
 inline bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion() const {
   if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl())
     return hasNonTrivialToPrimitiveDefaultInitializeCUnion(RD);
 
@@ -2882,22 +2882,22 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
   /// Retrieves the CXXRecordDecl that this type refers to, either
   /// because the type is a RecordType or because it is the injected-class-name
   /// type of a class template or class template partial specialization.
-  CXXRecordDecl *getAsCXXRecordDecl() const;
-  CXXRecordDecl *castAsCXXRecordDecl() const;
+  inline CXXRecordDecl *getAsCXXRecordDecl() const;
+  inline CXXRecordDecl *castAsCXXRecordDecl() const;
 
   /// Retrieves the RecordDecl this type refers to.
-  RecordDecl *getAsRecordDecl() const;
-  RecordDecl *castAsRecordDecl() const;
+  inline RecordDecl *getAsRecordDecl() const;
+  inline RecordDecl *castAsRecordDecl() const;
 
   /// Retrieves the EnumDecl this type refers to.
-  EnumDecl *getAsEnumDecl() const;
-  EnumDecl *castAsEnumDecl() const;
+  inline EnumDecl *getAsEnumDecl() const;
+  inline EnumDecl *castAsEnumDecl() const;
 
   /// Retrieves the TagDecl that this type refers to, either
   /// because the type is a TagType or because it is the injected-class-name
   /// type of a class template or class template partial specialization.
-  TagDecl *getAsTagDecl() const;
-  TagDecl *castAsTagDecl() const;
+  inline TagDecl *getAsTagDecl() const;
+  inline TagDecl *castAsTagDecl() const;
 
   /// If this is a pointer or reference to a RecordType, return the
   /// CXXRecordDecl that the type refers to.