Skip to content

Commit 32bc393

Browse files
committed
Merge remote-tracking branch 'origin/main' into vplan-replicate-vpinstruction-by-vf
2 parents 6bcaac8 + 149f91b commit 32bc393

File tree

259 files changed

+4915
-1574
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

259 files changed

+4915
-1574
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2216,7 +2216,8 @@ class MCPlusBuilder {
22162216
}
22172217

22182218
/// Print each annotation attached to \p Inst.
2219-
void printAnnotations(const MCInst &Inst, raw_ostream &OS) const;
2219+
void printAnnotations(const MCInst &Inst, raw_ostream &OS,
2220+
bool PrintMemData = false) const;
22202221

22212222
/// Remove annotation with a given \p Index.
22222223
///

bolt/lib/Core/BinaryContext.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2044,7 +2044,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
20442044
if (MCSymbol *Label = MIB->getInstLabel(Instruction))
20452045
OS << " # Label: " << *Label;
20462046

2047-
MIB->printAnnotations(Instruction, OS);
2047+
MIB->printAnnotations(Instruction, OS, PrintMemData || opts::PrintMemData);
20482048

20492049
if (opts::PrintDebugInfo)
20502050
printDebugInfo(OS, Instruction, Function, DwCtx.get());

bolt/lib/Core/MCPlusBuilder.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,8 @@ void MCPlusBuilder::stripAnnotations(MCInst &Inst, bool KeepTC) const {
378378
setTailCall(Inst);
379379
}
380380

381-
void MCPlusBuilder::printAnnotations(const MCInst &Inst,
382-
raw_ostream &OS) const {
381+
void MCPlusBuilder::printAnnotations(const MCInst &Inst, raw_ostream &OS,
382+
bool PrintMemData) const {
383383
std::optional<unsigned> FirstAnnotationOp = getFirstAnnotationOpIndex(Inst);
384384
if (!FirstAnnotationOp)
385385
return;
@@ -390,7 +390,11 @@ void MCPlusBuilder::printAnnotations(const MCInst &Inst,
390390
const int64_t Value = extractAnnotationValue(Imm);
391391
const auto *Annotation = reinterpret_cast<const MCAnnotation *>(Value);
392392
if (Index >= MCAnnotation::kGeneric) {
393-
OS << " # " << AnnotationNames[Index - MCAnnotation::kGeneric] << ": ";
393+
std::string AnnotationName =
394+
AnnotationNames[Index - MCAnnotation::kGeneric];
395+
if (!PrintMemData && AnnotationName == "MemoryAccessProfile")
396+
continue;
397+
OS << " # " << AnnotationName << ": ";
394398
Annotation->print(OS);
395399
}
396400
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Check that --print-mem-data option works properly in llvm-bolt
2+
3+
# RUN: split-file %s %t
4+
# RUN: %clang %cflags -fPIC -pie %t/main.s -o %t.exe -nostdlib -Wl,-q
5+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-mem-data=true --print-cfg \
6+
# RUN: --data %t/fdata | FileCheck %s -check-prefix=CHECK-PRINT
7+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \
8+
# RUN: --data %t/fdata | FileCheck %s -check-prefix=CHECK-DEFAULT
9+
10+
# CHECK-PRINT: ldr w2, [x1], #0x4 # MemoryAccessProfile: 7 total counts :
11+
# CHECK-PRINT-NEXT: { 0x123: 1 },
12+
# CHECK-PRINT-NEXT: { 0x456: 2 },
13+
# CHECK-PRINT-NEXT: { 0xabc: 4 }
14+
# CHECK-DEFAULT-NOT: MemoryAccessProfile
15+
16+
#--- main.s
17+
.text
18+
.align 4
19+
.global main
20+
.type main, %function
21+
main:
22+
sub sp, sp, #48
23+
add x1, sp, 8
24+
add x3, sp, 48
25+
mov w0, 0
26+
.L2:
27+
ldr w2, [x1], 4
28+
add w0, w0, w2
29+
cmp x1, x3
30+
bne .L2
31+
add sp, sp, 48
32+
ret
33+
.size main, .-main
34+
35+
# The three memory access data generated by the load at
36+
# offset 0x10 in the main.
37+
#--- fdata
38+
4 main 10 4 otherSym 123 1
39+
4 main 10 4 otherSym 456 2
40+
4 main 10 4 otherSym abc 4

clang/cmake/modules/CMakeLists.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@ include(FindPrefixFromConfig)
88
# the usual CMake convention seems to be ${Project}Targets.cmake.
99
set(CLANG_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/clang" CACHE STRING
1010
"Path for CMake subdirectory for Clang (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/clang')")
11-
# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
12-
set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang")
1311

1412
# Keep this in sync with llvm/cmake/CMakeLists.txt!
1513
set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING
1614
"Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')")
1715
# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
18-
string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_cmake_builddir "${LLVM_LIBRARY_DIR}")
19-
set(llvm_cmake_builddir "${llvm_cmake_builddir}/cmake/llvm")
16+
string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_builddir "${LLVM_LIBRARY_DIR}")
17+
set(llvm_cmake_builddir "${llvm_builddir}/cmake/llvm")
18+
set(clang_cmake_builddir "${llvm_builddir}/cmake/clang")
2019

2120
get_property(CLANG_EXPORTS GLOBAL PROPERTY CLANG_EXPORTS)
2221
export(TARGETS ${CLANG_EXPORTS} FILE ${clang_cmake_builddir}/ClangTargets.cmake)

clang/include/clang/Basic/arm_sme.td

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,10 @@ let SMETargetGuard = "sme2p1" in {
156156
////////////////////////////////////////////////////////////////////////////////
157157
// SME - Counting elements in a streaming vector
158158

159-
multiclass ZACount<string n_suffix> {
160-
def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone,
161-
"aarch64_sme_" # n_suffix,
162-
[IsOverloadNone, IsStreamingCompatible]>;
163-
}
164-
165-
defm SVCNTSB : ZACount<"cntsb">;
166-
defm SVCNTSH : ZACount<"cntsh">;
167-
defm SVCNTSW : ZACount<"cntsw">;
168-
defm SVCNTSD : ZACount<"cntsd">;
159+
def SVCNTSB : SInst<"svcntsb", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
160+
def SVCNTSH : SInst<"svcntsh", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
161+
def SVCNTSW : SInst<"svcntsw", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
162+
def SVCNTSD : SInst<"svcntsd", "nv", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible]>;
169163

170164
////////////////////////////////////////////////////////////////////////////////
171165
// SME - ADDHA/ADDVA

clang/lib/AST/ASTImporter.cpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,15 +1745,13 @@ ExpectedType ASTNodeImporter::VisitTagType(const TagType *T) {
17451745
if (!ToDeclOrErr)
17461746
return ToDeclOrErr.takeError();
17471747

1748-
if (DeclForType->isUsed()) {
1749-
// If there is a definition of the 'OriginalDecl', it should be imported to
1750-
// have all information for the type in the "To" AST. (In some cases no
1751-
// other reference may exist to the definition decl and it would not be
1752-
// imported otherwise.)
1753-
Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
1754-
if (!ToDefDeclOrErr)
1755-
return ToDefDeclOrErr.takeError();
1756-
}
1748+
// If there is a definition of the 'OriginalDecl', it should be imported to
1749+
// have all information for the type in the "To" AST. (In some cases no
1750+
// other reference may exist to the definition decl and it would not be
1751+
// imported otherwise.)
1752+
Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
1753+
if (!ToDefDeclOrErr)
1754+
return ToDefDeclOrErr.takeError();
17571755

17581756
if (T->isCanonicalUnqualified())
17591757
return Importer.getToContext().getCanonicalTagType(*ToDeclOrErr);

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4304,9 +4304,11 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
43044304
// size in bytes.
43054305
if (Ops.size() == 5) {
43064306
Function *StreamingVectorLength =
4307-
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
4307+
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
43084308
llvm::Value *StreamingVectorLengthCall =
4309-
Builder.CreateCall(StreamingVectorLength);
4309+
Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
4310+
llvm::ConstantInt::get(Int64Ty, 8), "svl",
4311+
/* HasNUW */ true, /* HasNSW */ true);
43104312
llvm::Value *Mulvl =
43114313
Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
43124314
// The type of the ptr parameter is void *, so use Int8Ty here.
@@ -4918,6 +4920,26 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
49184920
// Handle builtins which require their multi-vector operands to be swapped
49194921
swapCommutativeSMEOperands(BuiltinID, Ops);
49204922

4923+
auto isCntsBuiltin = [&]() {
4924+
switch (BuiltinID) {
4925+
default:
4926+
return 0;
4927+
case SME::BI__builtin_sme_svcntsb:
4928+
return 8;
4929+
case SME::BI__builtin_sme_svcntsh:
4930+
return 4;
4931+
case SME::BI__builtin_sme_svcntsw:
4932+
return 2;
4933+
}
4934+
};
4935+
4936+
if (auto Mul = isCntsBuiltin()) {
4937+
llvm::Value *Cntd =
4938+
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
4939+
return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
4940+
"mulsvl", /* HasNUW */ true, /* HasNSW */ true);
4941+
}
4942+
49214943
// Should not happen!
49224944
if (Builtin->LLVMIntrinsic == 0)
49234945
return nullptr;

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1975,9 +1975,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
19751975
}
19761976

19771977
const llvm::Triple::ArchType DebugEntryValueArchs[] = {
1978-
llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
1979-
llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
1980-
llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el};
1978+
llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
1979+
llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
1980+
llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el,
1981+
llvm::Triple::riscv32, llvm::Triple::riscv64};
19811982

19821983
if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
19831984
llvm::is_contained(DebugEntryValueArchs, T.getArch()))

clang/lib/Headers/f16cintrin.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@
2020
#define __DEFAULT_FN_ATTRS256 \
2121
__attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
2222

23+
#if defined(__cplusplus) && (__cplusplus >= 201103L)
24+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
25+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
26+
#else
27+
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
28+
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
29+
#endif
30+
2331
/* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
2432
* but that's because icc can emulate these without f16c using a library call.
2533
* Since we don't do that let's leave these in f16cintrin.h.
@@ -35,7 +43,7 @@
3543
/// \param __a
3644
/// A 16-bit half-precision float value.
3745
/// \returns The converted 32-bit float value.
38-
static __inline float __DEFAULT_FN_ATTRS128
46+
static __inline float __DEFAULT_FN_ATTRS128_CONSTEXPR
3947
_cvtsh_ss(unsigned short __a)
4048
{
4149
return (float)__builtin_bit_cast(__fp16, __a);
@@ -104,7 +112,7 @@ _cvtsh_ss(unsigned short __a)
104112
/// A 128-bit vector containing 16-bit half-precision float values. The lower
105113
/// 64 bits are used in the conversion.
106114
/// \returns A 128-bit vector of [4 x float] containing converted float values.
107-
static __inline __m128 __DEFAULT_FN_ATTRS128
115+
static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
108116
_mm_cvtph_ps(__m128i __a)
109117
{
110118
typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
@@ -151,7 +159,7 @@ _mm_cvtph_ps(__m128i __a)
151159
/// converted to 32-bit single-precision float values.
152160
/// \returns A vector of [8 x float] containing the converted 32-bit
153161
/// single-precision float values.
154-
static __inline __m256 __DEFAULT_FN_ATTRS256
162+
static __inline __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
155163
_mm256_cvtph_ps(__m128i __a)
156164
{
157165
typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
@@ -161,5 +169,7 @@ _mm256_cvtph_ps(__m128i __a)
161169

162170
#undef __DEFAULT_FN_ATTRS128
163171
#undef __DEFAULT_FN_ATTRS256
172+
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
173+
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
164174

165175
#endif /* __F16CINTRIN_H */

0 commit comments

Comments
 (0)