llvm
diff --git a/‎bolt/include/bolt/Core/MCPlusBuilder.h‎
Lines changed: 2 additions & 1 deletion b/‎bolt/include/bolt/Core/MCPlusBuilder.h‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bolt/lib/Core/BinaryContext.cpp‎
Lines changed: 1 addition & 1 deletion b/‎bolt/lib/Core/BinaryContext.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bolt/lib/Core/MCPlusBuilder.cpp‎
Lines changed: 7 additions & 3 deletions b/‎bolt/lib/Core/MCPlusBuilder.cpp‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎bolt/test/AArch64/print-mem-data.test‎
Lines changed: 40 additions & 0 deletions b/‎bolt/test/AArch64/print-mem-data.test‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎clang/cmake/modules/CMakeLists.txt‎
Lines changed: 3 additions & 4 deletions b/‎clang/cmake/modules/CMakeLists.txt‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎clang/include/clang/Basic/arm_sme.td‎
Lines changed: 4 additions & 10 deletions b/‎clang/include/clang/Basic/arm_sme.td‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎clang/lib/AST/ASTImporter.cpp‎
Lines changed: 7 additions & 9 deletions b/‎clang/lib/AST/ASTImporter.cpp‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎clang/lib/CodeGen/TargetBuiltins/ARM.cpp‎
Lines changed: 24 additions & 2 deletions b/‎clang/lib/CodeGen/TargetBuiltins/ARM.cpp‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎clang/lib/Frontend/CompilerInvocation.cpp‎
Lines changed: 4 additions & 3 deletions b/‎clang/lib/Frontend/CompilerInvocation.cpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎clang/lib/Headers/f16cintrin.h‎
Lines changed: 13 additions & 3 deletions b/‎clang/lib/Headers/f16cintrin.h‎
Lines changed: 13 additions & 3 deletions
@@ -2216,7 +2216,8 @@ class MCPlusBuilder {
   }
 
   /// Print each annotation attached to \p Inst.
-  void printAnnotations(const MCInst &Inst, raw_ostream &OS) const;
+  void printAnnotations(const MCInst &Inst, raw_ostream &OS,
+                        bool PrintMemData = false) const;
 
   /// Remove annotation with a given \p Index.
   ///
 
@@ -2044,7 +2044,7 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
   if (MCSymbol *Label = MIB->getInstLabel(Instruction))
     OS << " # Label: " << *Label;
 
-  MIB->printAnnotations(Instruction, OS);
+  MIB->printAnnotations(Instruction, OS, PrintMemData || opts::PrintMemData);
 
   if (opts::PrintDebugInfo)
     printDebugInfo(OS, Instruction, Function, DwCtx.get());
 
@@ -378,8 +378,8 @@ void MCPlusBuilder::stripAnnotations(MCInst &Inst, bool KeepTC) const {
     setTailCall(Inst);
 }
 
-void MCPlusBuilder::printAnnotations(const MCInst &Inst,
-                                     raw_ostream &OS) const {
+void MCPlusBuilder::printAnnotations(const MCInst &Inst, raw_ostream &OS,
+                                     bool PrintMemData) const {
   std::optional<unsigned> FirstAnnotationOp = getFirstAnnotationOpIndex(Inst);
   if (!FirstAnnotationOp)
     return;
@@ -390,7 +390,11 @@ void MCPlusBuilder::printAnnotations(const MCInst &Inst,
     const int64_t Value = extractAnnotationValue(Imm);
     const auto *Annotation = reinterpret_cast<const MCAnnotation *>(Value);
     if (Index >= MCAnnotation::kGeneric) {
-      OS << " # " << AnnotationNames[Index - MCAnnotation::kGeneric] << ": ";
+      std::string AnnotationName =
+          AnnotationNames[Index - MCAnnotation::kGeneric];
+      if (!PrintMemData && AnnotationName == "MemoryAccessProfile")
+        continue;
+      OS << " # " << AnnotationName << ": ";
       Annotation->print(OS);
     }
   }
 
@@ -0,0 +1,40 @@
+# Check that --print-mem-data option works properly in llvm-bolt
+
+# RUN: split-file %s %t
+# RUN: %clang %cflags -fPIC -pie %t/main.s -o %t.exe -nostdlib -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-mem-data=true --print-cfg \
+# RUN:   --data %t/fdata | FileCheck %s -check-prefix=CHECK-PRINT
+# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \
+# RUN:   --data %t/fdata | FileCheck %s -check-prefix=CHECK-DEFAULT
+
+# CHECK-PRINT: ldr	w2, [x1], #0x4 # MemoryAccessProfile: 7 total counts :
+# CHECK-PRINT-NEXT: { 0x123: 1 },
+# CHECK-PRINT-NEXT: { 0x456: 2 },
+# CHECK-PRINT-NEXT: { 0xabc: 4 }
+# CHECK-DEFAULT-NOT: MemoryAccessProfile
+
+#--- main.s
+  .text
+  .align 4
+  .global main
+  .type	main, %function
+main:
+  sub sp, sp, #48
+  add x1, sp, 8
+  add x3, sp, 48
+  mov w0, 0
+.L2:
+  ldr w2, [x1], 4
+  add w0, w0, w2
+  cmp x1, x3
+  bne .L2
+  add sp, sp, 48
+  ret
+  .size main, .-main
+
+# The three memory access data generated by the load at
+# offset 0x10 in the main.
+#--- fdata
+4 main 10 4 otherSym 123 1
+4 main 10 4 otherSym 456 2
+4 main 10 4 otherSym abc 4
@@ -8,15 +8,14 @@ include(FindPrefixFromConfig)
 # the usual CMake convention seems to be ${Project}Targets.cmake.
 set(CLANG_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/clang" CACHE STRING
   "Path for CMake subdirectory for Clang (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/clang')")
-# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang")
 
 # Keep this in sync with llvm/cmake/CMakeLists.txt!
 set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING
   "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')")
 # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_cmake_builddir "${LLVM_LIBRARY_DIR}")
-set(llvm_cmake_builddir "${llvm_cmake_builddir}/cmake/llvm")
+string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_builddir "${LLVM_LIBRARY_DIR}")
+set(llvm_cmake_builddir "${llvm_builddir}/cmake/llvm")
+set(clang_cmake_builddir "${llvm_builddir}/cmake/clang")
 
 get_property(CLANG_EXPORTS GLOBAL PROPERTY CLANG_EXPORTS)
 export(TARGETS ${CLANG_EXPORTS} FILE ${clang_cmake_builddir}/ClangTargets.cmake)
 
@@ -156,16 +156,10 @@ let SMETargetGuard = "sme2p1" in {
 ////////////////////////////////////////////////////////////////////////////////
 // SME - Counting elements in a streaming vector
 
-multiclass ZACount<string n_suffix> {
-  def NAME : SInst<"sv" # n_suffix, "nv", "", MergeNone,
-                    "aarch64_sme_" # n_suffix,
-                    [IsOverloadNone, IsStreamingCompatible]>;
-}
-
-defm SVCNTSB : ZACount<"cntsb">;
-defm SVCNTSH : ZACount<"cntsh">;
-defm SVCNTSW : ZACount<"cntsw">;
-defm SVCNTSD : ZACount<"cntsd">;
+def SVCNTSB : SInst<"svcntsb", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSH : SInst<"svcntsh", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSW : SInst<"svcntsw", "nv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
+def SVCNTSD : SInst<"svcntsd", "nv", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible]>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // SME - ADDHA/ADDVA
 
@@ -1745,15 +1745,13 @@ ExpectedType ASTNodeImporter::VisitTagType(const TagType *T) {
   if (!ToDeclOrErr)
     return ToDeclOrErr.takeError();
 
-  if (DeclForType->isUsed()) {
-    // If there is a definition of the 'OriginalDecl', it should be imported to
-    // have all information for the type in the "To" AST. (In some cases no
-    // other reference may exist to the definition decl and it would not be
-    // imported otherwise.)
-    Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
-    if (!ToDefDeclOrErr)
-      return ToDefDeclOrErr.takeError();
-  }
+  // If there is a definition of the 'OriginalDecl', it should be imported to
+  // have all information for the type in the "To" AST. (In some cases no
+  // other reference may exist to the definition decl and it would not be
+  // imported otherwise.)
+  Expected<TagDecl *> ToDefDeclOrErr = import(DeclForType->getDefinition());
+  if (!ToDefDeclOrErr)
+    return ToDefDeclOrErr.takeError();
 
   if (T->isCanonicalUnqualified())
     return Importer.getToContext().getCanonicalTagType(*ToDeclOrErr);
 
@@ -4304,9 +4304,11 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   // size in bytes.
   if (Ops.size() == 5) {
     Function *StreamingVectorLength =
-        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+        CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd);
     llvm::Value *StreamingVectorLengthCall =
-        Builder.CreateCall(StreamingVectorLength);
+        Builder.CreateMul(Builder.CreateCall(StreamingVectorLength),
+                          llvm::ConstantInt::get(Int64Ty, 8), "svl",
+                          /* HasNUW */ true, /* HasNSW */ true);
     llvm::Value *Mulvl =
         Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
     // The type of the ptr parameter is void *, so use Int8Ty here.
@@ -4918,6 +4920,26 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
+  auto isCntsBuiltin = [&]() {
+    switch (BuiltinID) {
+    default:
+      return 0;
+    case SME::BI__builtin_sme_svcntsb:
+      return 8;
+    case SME::BI__builtin_sme_svcntsh:
+      return 4;
+    case SME::BI__builtin_sme_svcntsw:
+      return 2;
+    }
+  };
+
+  if (auto Mul = isCntsBuiltin()) {
+    llvm::Value *Cntd =
+        Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsd));
+    return Builder.CreateMul(Cntd, llvm::ConstantInt::get(Int64Ty, Mul),
+                             "mulsvl", /* HasNUW */ true, /* HasNSW */ true);
+  }
+
   // Should not happen!
   if (Builtin->LLVMIntrinsic == 0)
     return nullptr;
 
@@ -1975,9 +1975,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
   }
 
   const llvm::Triple::ArchType DebugEntryValueArchs[] = {
-      llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
-      llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
-      llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el};
+      llvm::Triple::x86,     llvm::Triple::x86_64, llvm::Triple::aarch64,
+      llvm::Triple::arm,     llvm::Triple::armeb,  llvm::Triple::mips,
+      llvm::Triple::mipsel,  llvm::Triple::mips64, llvm::Triple::mips64el,
+      llvm::Triple::riscv32, llvm::Triple::riscv64};
 
   if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
       llvm::is_contained(DebugEntryValueArchs, T.getArch()))
 
@@ -20,6 +20,14 @@
 #define __DEFAULT_FN_ATTRS256 \
   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
 
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#else
+#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
+#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#endif
+
 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
  * but that's because icc can emulate these without f16c using a library call.
  * Since we don't do that let's leave these in f16cintrin.h.
@@ -35,7 +43,7 @@
 /// \param __a
 ///    A 16-bit half-precision float value.
 /// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS128
+static __inline float __DEFAULT_FN_ATTRS128_CONSTEXPR
 _cvtsh_ss(unsigned short __a)
 {
   return (float)__builtin_bit_cast(__fp16, __a);
@@ -104,7 +112,7 @@ _cvtsh_ss(unsigned short __a)
 ///    A 128-bit vector containing 16-bit half-precision float values. The lower
 ///    64 bits are used in the conversion.
 /// \returns A 128-bit vector of [4 x float] containing converted float values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
@@ -151,7 +159,7 @@ _mm_cvtph_ps(__m128i __a)
 ///    converted to 32-bit single-precision float values.
 /// \returns A vector of [8 x float] containing the converted 32-bit
 ///    single-precision float values.
-static __inline __m256 __DEFAULT_FN_ATTRS256
+static __inline __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
@@ -161,5 +169,7 @@ _mm256_cvtph_ps(__m128i __a)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
+#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __F16CINTRIN_H */
Original file line number	Diff line number	Diff line change
`@@ -2216,7 +2216,8 @@ class MCPlusBuilder {`
`2216`	`2216`	`}`
`2217`	`2217`
`2218`	`2218`	`/// Print each annotation attached to \p Inst.`
`2219`		`- void printAnnotations(const MCInst &Inst, raw_ostream &OS) const;`
	`2219`	`+ void printAnnotations(const MCInst &Inst, raw_ostream &OS,`
	`2220`	`+ bool PrintMemData = false) const;`
`2220`	`2221`
`2221`	`2222`	`/// Remove annotation with a given \p Index.`
`2222`	`2223`	`///`