Skip to content

Commit 1bfad05

Browse files
authored
merge main into amd-staging (llvm#3781)
2 parents b15e53a + 74c49f6 commit 1bfad05

File tree

287 files changed

+9306
-2066
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

287 files changed

+9306
-2066
lines changed

bolt/test/AArch64/veneer-lld-abs.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
## Occasionally, we see the linker not generating $d symbols for long veneers
1414
## causing BOLT to fail veneer elimination.
15-
# RUN: llvm-objcopy --remove-symbol-prefix=\$d %t.exe %t.no-marker.exe
15+
# RUN: llvm-objcopy --remove-symbol-prefix='$d' %t.exe %t.no-marker.exe
1616
# RUN: llvm-bolt %t.no-marker.exe -o %t.no-marker.bolt \
1717
# RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
1818
# RUN: llvm-objdump -d -j .text %t.no-marker.bolt | \

bolt/test/X86/double-jump.test

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
## Test the double jump removal peephole.
22

3-
## This test has commands that rely on shell capabilities that won't execute
4-
## correctly on Windows e.g. subshell execution
5-
REQUIRES: shell
6-
73
RUN: %clangxx %cxxflags %p/Inputs/double_jump.cpp -o %t.exe
8-
RUN: (llvm-bolt %t.exe --peepholes=double-jumps \
9-
RUN: --eliminate-unreachable -o %t 2>&1 \
10-
RUN: && llvm-objdump -d %t --print-imm-hex --no-show-raw-insn) | FileCheck %s
4+
RUN: llvm-bolt %t.exe --peepholes=double-jumps \
5+
RUN: --eliminate-unreachable -o %t | FileCheck --check-prefix CHECK-BOLT %s
6+
RUN: llvm-objdump -d %t --print-imm-hex --no-show-raw-insn | FileCheck %s
117

12-
CHECK: BOLT-INFO: Peephole: 1 double jumps patched.
8+
CHECK-BOLT: BOLT-INFO: Peephole: 1 double jumps patched.
139

1410
CHECK: <_Z3foom>:
1511
CHECK-NEXT: pushq %rbp

bolt/test/X86/jmp-optimization.test

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
## Tests the optimization of functions that just do a tail call in the beginning.
22

3-
## This test has commands that rely on shell capabilities that won't execute
4-
## correctly on Windows e.g. unsupported parameter expansion
5-
REQUIRES: shell
6-
7-
RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
3+
RUN: %clangxx %cxxflags -O2 %S/Inputs/jmp_opt.cpp %S/Inputs/jmp_opt2.cpp \
4+
RUN: %S/Inputs/jmp_opt3.cpp -o %t
85
RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
96
RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
107

bolt/test/X86/jump-table-icp.test

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,16 @@ RUN: link_fdata %p/Inputs/jump_table_icp.s %t.o %t.fdata --nmtool llvm-nm
44
RUN: llvm-strip --strip-unneeded %t.o
55
RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
66

7-
## This test has commands that rely on shell capabilities that won't execute
8-
## correctly on Windows e.g. subshell execution
9-
REQUIRES: shell
10-
11-
RUN: (llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
7+
RUN: llvm-bolt %t.exe --data %t.fdata -o %t --relocs \
128
RUN: --reorder-blocks=cache --split-functions --split-all-cold \
139
RUN: --use-gnu-stack --dyno-stats --indirect-call-promotion=jump-tables \
1410
RUN: --print-icp -v=0 \
1511
RUN: --enable-bat --print-cache-metrics \
1612
RUN: --icp-jt-remaining-percent-threshold=10 \
1713
RUN: --icp-jt-total-percent-threshold=2 \
1814
RUN: --indirect-call-promotion-topn=1 \
19-
RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 2>&1 && \
20-
RUN: llvm-objdump -d %t --print-imm-hex) | FileCheck %s
15+
RUN: --icp-jump-tables-targets --align-functions-max-bytes=7 | FileCheck %s
16+
RUN: llvm-objdump -d %t --print-imm-hex | FileCheck --check-prefix CHECK-ASM %s
2117

2218
BOLT-INFO: ICP total indirect callsites = 0
2319
BOLT-INFO: ICP total jump table callsites = 2
@@ -107,14 +103,14 @@ CHECK-NEXT: Exec Count : 140
107103
CHECK: Predecessors: .Ltmp{{.*}}, .LFT{{.*}}
108104
CHECK: Successors: .Ltmp{{.*}} (mispreds: 0, count: 98)
109105

110-
CHECK: <_Z3inci>:
111-
CHECK: movq 0x{{.*}}(,%rax,8), %rax
112-
CHECK-NEXT: cmpq $0x{{.*}}, %rax
113-
CHECK-NEXT: je {{.*}} <_Z3inci+0x{{.*}}>
114-
CHECK-NEXT: jmpq *%rax
115-
116-
CHECK: <_Z7inc_dupi>:
117-
CHECK: movq 0x{{.*}}(,%rax,8), %rax
118-
CHECK-NEXT: cmpq $0x{{.*}}, %rax
119-
CHECK-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}>
120-
CHECK-NEXT: jmpq *%rax
106+
CHECK-ASM: <_Z3inci>:
107+
CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax
108+
CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax
109+
CHECK-ASM-NEXT: je {{.*}} <_Z3inci+0x{{.*}}>
110+
CHECK-ASM-NEXT: jmpq *%rax
111+
112+
CHECK-ASM: <_Z7inc_dupi>:
113+
CHECK-ASM: movq 0x{{.*}}(,%rax,8), %rax
114+
CHECK-ASM-NEXT: cmpq $0x{{.*}}, %rax
115+
CHECK-ASM-NEXT: je {{.*}} <_Z7inc_dupi+0x{{.*}}>
116+
CHECK-ASM-NEXT: jmpq *%rax

bolt/test/X86/shrinkwrapping.test

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,21 @@
22
## shrink-wrapping when optimizing a function without
33
## frame pointers.
44

5-
## This test has commands that rely on shell capabilities that won't execute
6-
## correctly on Windows e.g. subshell execution to capture command output.
7-
REQUIRES: shell
8-
95
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
106
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
117
RUN: --print-only=main --print-cfg \
128
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
139
RUN: FileCheck %s --check-prefix=CHECK-BOLT
1410

15-
RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -e \
16-
RUN: `llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
17-
RUN: cut -f1 -d' ' | tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
11+
RUN: llvm-nm --numeric-sort %t.exe | grep main | tail -n 1 | \
12+
RUN: cut -f1 -d' ' | tail -c9 > %t.input_address
13+
RUN: llvm-objdump --dwarf=frames %t.exe | grep -A20 -f %t.input_address \
14+
RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-INPUT
1815

19-
RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
20-
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
21-
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
16+
RUN: llvm-nm --numeric-sort %t | grep main | tail -n 1 | \
17+
RUN: cut -f1 -d' ' | tail -c9 > %t.output_address
18+
RUN: llvm-objdump --dwarf=frames %t | grep -A20 -f %t.output_address \
19+
RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
2220

2321
CHECK-BOLT: Extern Entry Count: 100
2422
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops

bolt/test/dump-dot-func.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Test the --dump-dot-func option with multiple functions
22
# (includes tests for both mangled/unmangled names)
33

4-
RUN: %clang++ %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
4+
RUN: %clangxx %p/Inputs/multi-func.cpp -o %t.exe -Wl,-q
55

66
# Test 1: --dump-dot-func with specific function name (mangled)
77
RUN: llvm-bolt %t.exe -o %t.bolt1 --dump-dot-func=_Z3addii -v=1 2>&1 | FileCheck %s --check-prefix=ADD

clang/docs/LanguageExtensions.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,9 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``.
943943

944944
Each builtin accesses memory according to a provided boolean mask. These are
945945
provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
946-
argument is always boolean mask vector.
946+
argument is always boolean mask vector. The ``__builtin_masked_load`` builtin
947+
takes an optional third vector argument that will be used for the result of the
948+
masked-off lanes. These builtins assume the memory is always aligned.
947949

948950
Example:
949951

clang/docs/ReleaseNotes.rst

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,27 @@ Non-comprehensive list of changes in this release
139139

140140
- Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
141141

142-
- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for
143-
trapping into the generated debug info. This feature allows debuggers (e.g. LLDB) to display
144-
the reason for trapping if the trap is reached. The string is currently encoded in the debug
145-
info as an artificial frame that claims to be inlined at the trap location. The function used
146-
for the artificial frame is an artificial function whose name encodes the reason for trapping.
147-
The encoding used is currently the same as ``__builtin_verbose_trap`` but might change in the future.
148-
This feature is enabled by default but can be disabled by compiling with
149-
``-fno-sanitize-annotate-debug-info-traps``.
142+
- Trapping UBSan (e.g. ``-fsanitize=undefined -fsanitize-trap=undefined``) now
143+
emits a string describing the reason for trapping into the generated debug
144+
info. This feature allows debuggers (e.g. LLDB) to display the reason for
145+
trapping if the trap is reached. The string is currently encoded in the debug
146+
info as an artificial frame that claims to be inlined at the trap location.
147+
The function used for the artificial frame is an artificial function whose
148+
name encodes the reason for trapping. The encoding used is currently the same
149+
as ``__builtin_verbose_trap`` but might change in the future. This feature is
150+
enabled by default but can be disabled by compiling with
151+
``-fno-sanitize-debug-trap-reasons``. The feature has a ``basic`` and
152+
``detailed`` mode (the default). The ``basic`` mode emits a hard-coded string
153+
per trap kind (e.g. ``Integer addition overflowed``) and the ``detailed`` mode
154+
emits a more descriptive string describing each individual trap (e.g. ``signed
155+
integer addition overflow in 'a + b'``). The ``detailed`` mode produces larger
156+
debug info than ``basic`` but is more helpful for debugging. The
157+
``-fsanitize-debug-trap-reasons=`` flag can be used to switch between the
158+
different modes or disable the feature entirely. Note due to trap merging in
159+
optimized builds (i.e. in each function all traps of the same kind get merged
160+
into the same trap instruction) the trap reasons might be removed. To prevent
161+
this build without optimizations (i.e. use `-O0` or use the `optnone` function
162+
attribute) or use the `fno-sanitize-merge=` flag in optimized builds.
150163

151164
- ``__builtin_elementwise_max`` and ``__builtin_elementwise_min`` functions for integer types can
152165
now be used in constant expressions.
@@ -183,7 +196,9 @@ Non-comprehensive list of changes in this release
183196

184197
New Compiler Flags
185198
------------------
186-
- New option ``-fno-sanitize-annotate-debug-info-traps`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
199+
- New option ``-fno-sanitize-debug-trap-reasons`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
200+
- New option ``-fsanitize-debug-trap-reasons=`` added to control emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
201+
187202

188203
Lanai Support
189204
^^^^^^^^^^^^^^

clang/include/clang/AST/Type.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,55 @@
2323

2424
namespace clang {
2525

26+
inline CXXRecordDecl *Type::getAsCXXRecordDecl() const {
27+
const auto *TT = dyn_cast<TagType>(CanonicalType);
28+
if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
29+
return nullptr;
30+
auto *TD = TT->getOriginalDecl();
31+
if (isa<RecordType>(TT) && !isa<CXXRecordDecl>(TD))
32+
return nullptr;
33+
return cast<CXXRecordDecl>(TD)->getDefinitionOrSelf();
34+
}
35+
36+
inline CXXRecordDecl *Type::castAsCXXRecordDecl() const {
37+
const auto *TT = cast<TagType>(CanonicalType);
38+
return cast<CXXRecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
39+
}
40+
41+
inline RecordDecl *Type::getAsRecordDecl() const {
42+
const auto *TT = dyn_cast<TagType>(CanonicalType);
43+
if (!isa_and_present<RecordType, InjectedClassNameType>(TT))
44+
return nullptr;
45+
return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
46+
}
47+
48+
inline RecordDecl *Type::castAsRecordDecl() const {
49+
const auto *TT = cast<TagType>(CanonicalType);
50+
return cast<RecordDecl>(TT->getOriginalDecl())->getDefinitionOrSelf();
51+
}
52+
53+
inline EnumDecl *Type::getAsEnumDecl() const {
54+
if (const auto *TT = dyn_cast<EnumType>(CanonicalType))
55+
return TT->getOriginalDecl()->getDefinitionOrSelf();
56+
return nullptr;
57+
}
58+
59+
inline EnumDecl *Type::castAsEnumDecl() const {
60+
return cast<EnumType>(CanonicalType)
61+
->getOriginalDecl()
62+
->getDefinitionOrSelf();
63+
}
64+
65+
inline TagDecl *Type::getAsTagDecl() const {
66+
if (const auto *TT = dyn_cast<TagType>(CanonicalType))
67+
return TT->getOriginalDecl()->getDefinitionOrSelf();
68+
return nullptr;
69+
}
70+
71+
inline TagDecl *Type::castAsTagDecl() const {
72+
return cast<TagType>(CanonicalType)->getOriginalDecl()->getDefinitionOrSelf();
73+
}
74+
2675
inline bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion() const {
2776
if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl())
2877
return hasNonTrivialToPrimitiveDefaultInitializeCUnion(RD);

clang/include/clang/AST/TypeBase.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2882,22 +2882,22 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
28822882
/// Retrieves the CXXRecordDecl that this type refers to, either
28832883
/// because the type is a RecordType or because it is the injected-class-name
28842884
/// type of a class template or class template partial specialization.
2885-
CXXRecordDecl *getAsCXXRecordDecl() const;
2886-
CXXRecordDecl *castAsCXXRecordDecl() const;
2885+
inline CXXRecordDecl *getAsCXXRecordDecl() const;
2886+
inline CXXRecordDecl *castAsCXXRecordDecl() const;
28872887

28882888
/// Retrieves the RecordDecl this type refers to.
2889-
RecordDecl *getAsRecordDecl() const;
2890-
RecordDecl *castAsRecordDecl() const;
2889+
inline RecordDecl *getAsRecordDecl() const;
2890+
inline RecordDecl *castAsRecordDecl() const;
28912891

28922892
/// Retrieves the EnumDecl this type refers to.
2893-
EnumDecl *getAsEnumDecl() const;
2894-
EnumDecl *castAsEnumDecl() const;
2893+
inline EnumDecl *getAsEnumDecl() const;
2894+
inline EnumDecl *castAsEnumDecl() const;
28952895

28962896
/// Retrieves the TagDecl that this type refers to, either
28972897
/// because the type is a TagType or because it is the injected-class-name
28982898
/// type of a class template or class template partial specialization.
2899-
TagDecl *getAsTagDecl() const;
2900-
TagDecl *castAsTagDecl() const;
2899+
inline TagDecl *getAsTagDecl() const;
2900+
inline TagDecl *castAsTagDecl() const;
29012901

29022902
/// If this is a pointer or reference to a RecordType, return the
29032903
/// CXXRecordDecl that the type refers to.

0 commit comments

Comments
 (0)