llvm
diff --git a/‎bolt/runtime/CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion b/‎bolt/runtime/CMakeLists.txt‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 2 additions & 0 deletions b/‎clang/docs/ReleaseNotes.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/include/clang/Driver/Options.td‎
Lines changed: 3 additions & 3 deletions b/‎clang/include/clang/Driver/Options.td‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎clang/lib/CodeGen/CGDebugInfo.cpp‎
Lines changed: 5 additions & 1 deletion b/‎clang/lib/CodeGen/CGDebugInfo.cpp‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎clang/test/DebugInfo/KeyInstructions/coro-dwarf-key-instrs.cpp‎
Lines changed: 81 additions & 0 deletions b/‎clang/test/DebugInfo/KeyInstructions/coro-dwarf-key-instrs.cpp‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎clang/test/DebugInfo/KeyInstructions/flag.cpp‎
Lines changed: 4 additions & 0 deletions b/‎clang/test/DebugInfo/KeyInstructions/flag.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎flang/include/flang/Common/format.h‎
Lines changed: 73 additions & 5 deletions b/‎flang/include/flang/Common/format.h‎
Lines changed: 73 additions & 5 deletions
diff --git a/‎flang/lib/Lower/Support/ReductionProcessor.cpp‎
Lines changed: 22 additions & 7 deletions b/‎flang/lib/Lower/Support/ReductionProcessor.cpp‎
Lines changed: 22 additions & 7 deletions
@@ -35,7 +35,12 @@ set(BOLT_RT_FLAGS
   -fno-exceptions
   -fno-rtti
   -fno-stack-protector
-  -fPIC)
+  -fPIC
+  # Runtime currently assumes omitted frame pointers for functions marked __attribute((naked)).
+  # Protect against distros adding -fno-omit-frame-pointer and compiling with GCC.
+  # Refs: llvm/llvm-project#148595 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77882
+  -fomit-frame-pointer
+)
 if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
   set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS} 
     -mno-sse 
 
@@ -376,6 +376,8 @@ New Compiler Flags
 
 - New option ``-ignore-pch`` added to disable precompiled headers. It overrides ``-emit-pch`` and ``-include-pch``. (#GH142409, `PCHDocs <https://clang.llvm.org/docs/UsersManual.html#ignoring-a-pch-file>`_).
 
+- New options ``-g[no-]key-instructions`` added, disabled by default. Reduces jumpiness of debug stepping for optimized code in some debuggers (not LLDB at this time). Not recommended for use without optimizations. DWARF only. Note both the positive and negative flags imply ``-g``.
+
 Deprecated Compiler Flags
 -------------------------
 
 
@@ -4737,9 +4737,9 @@ def gno_embed_source : Flag<["-"], "gno-embed-source">, Group<g_flags_Group>,
 defm key_instructions : BoolGOption<"key-instructions",
     CodeGenOpts<"DebugKeyInstructions">, DefaultFalse,
     NegFlag<SetFalse>, PosFlag<SetTrue, [], [],
-        "Enable Key Instructions, which reduces the jumpiness of optimized code stepping (DWARF only)."
-        " Requires LLVM built with LLVM_EXPERIMENTAL_KEY_INSTRUCTIONS.">,
-    BothFlags<[HelpHidden], [ClangOption, CLOption, CC1Option]>>;
+        "Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code"
+        " in some debuggers. DWARF only. Implies -g.">,
+    BothFlags<[], [ClangOption, CLOption, CC1Option]>>;
 def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
 def help : Flag<["-", "--"], "help">,
     Visibility<[ClangOption, CC1Option, CC1AsOption,
 
@@ -4683,6 +4683,7 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   llvm::DIFile *Unit = getOrCreateFile(Loc);
   llvm::DIScope *FDContext = Unit;
   llvm::DINodeArray TParamsArray;
+  bool KeyInstructions = CGM.getCodeGenOpts().DebugKeyInstructions;
   if (!HasDecl) {
     // Use llvm function name.
     LinkageName = Fn->getName();
@@ -4699,6 +4700,9 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     }
     collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
                              TParamsArray, Flags);
+    // Disable KIs if this is a coroutine.
+    KeyInstructions =
+        KeyInstructions && !isa_and_present<CoroutineBodyStmt>(FD->getBody());
   } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
     Flags |= llvm::DINode::FlagPrototyped;
@@ -4760,7 +4764,7 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   llvm::DISubprogram *SP = DBuilder.createFunction(
       FDContext, Name, LinkageName, Unit, LineNo, DIFnType, ScopeLine,
       FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl, nullptr,
-      Annotations, "", CGM.getCodeGenOpts().DebugKeyInstructions);
+      Annotations, "", KeyInstructions);
   Fn->setSubprogram(SP);
 
   // We might get here with a VarDecl in the case we're generating
 
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -disable-llvm-optzns -std=c++20 \
+// RUN:            -triple=x86_64 -dwarf-version=4 -debug-info-kind=limited \
+// RUN:            -emit-llvm -o - %s -gkey-instructions | \
+// RUN:            FileCheck %s
+
+// Check that for the coroutine below, we mark the created DISubprogram as
+// not having key instructions. This will prevent AsmPrinter from trying to
+// instrument the linetable with key-instructions for source-locations in
+// the coroutine scope.
+//
+// This is a temporary workaround for key instructions: we can instrument
+// coroutine code in the future, but it hasn't been done yet.
+//
+// File contents copied from coro-dwarf.cpp.
+
+namespace std {
+template <typename... T> struct coroutine_traits;
+
+template <class Promise = void> struct coroutine_handle {
+  coroutine_handle() = default;
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <> struct coroutine_handle<void> {
+  static coroutine_handle from_address(void *) noexcept;
+  coroutine_handle() = default;
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+};
+} // namespace std
+
+struct suspend_always {
+  bool await_ready() noexcept;
+  void await_suspend(std::coroutine_handle<>) noexcept;
+  void await_resume() noexcept;
+};
+
+template <typename... Args> struct std::coroutine_traits<void, Args...> {
+  struct promise_type {
+    void get_return_object() noexcept;
+    suspend_always initial_suspend() noexcept;
+    suspend_always final_suspend() noexcept;
+    void return_void() noexcept;
+    promise_type();
+    ~promise_type() noexcept;
+    void unhandled_exception() noexcept;
+  };
+};
+
+// TODO: Not supported yet
+struct CopyOnly {
+  int val;
+  CopyOnly(const CopyOnly &) noexcept;
+  CopyOnly(CopyOnly &&) = delete;
+  ~CopyOnly();
+};
+
+struct MoveOnly {
+  int val;
+  MoveOnly(const MoveOnly &) = delete;
+  MoveOnly(MoveOnly &&) noexcept;
+  ~MoveOnly();
+};
+
+struct MoveAndCopy {
+  int val;
+  MoveAndCopy(const MoveAndCopy &) noexcept;
+  MoveAndCopy(MoveAndCopy &&) noexcept;
+  ~MoveAndCopy();
+};
+
+void consume(int, int, int) noexcept;
+
+void f_coro(int val, MoveOnly moParam, MoveAndCopy mcParam) {
+  consume(val, moParam.val, mcParam.val);
+  co_return;
+}
+
+// CHECK: ![[SP:[0-9]+]] = distinct !DISubprogram(name: "f_coro", linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy"
+// CHECK-NOT: keyInstructions:
+// CHECK: !DIFil
+
@@ -3,6 +3,10 @@
 //// Default: Off.
 // RUN: %clang -### -target x86_64 -c -gdwarf %s 2>&1 | FileCheck %s --check-prefixes=NO-KEY-INSTRUCTIONS
 
+//// Help.
+// RUN %clang --help | FileCheck %s --check-prefix=HELP
+// HELP: -gkey-instructions  Enable Key Instructions, which reduces the jumpiness of debug stepping in optimized C/C++ code in some debuggers. DWARF only. Implies -g.
+
 // KEY-INSTRUCTIONS: "-gkey-instructions"
 // NO-KEY-INSTRUCTIONS-NOT: key-instructions
 
 
@@ -12,6 +12,7 @@
 #include "Fortran-consts.h"
 #include "enum-set.h"
 #include <cstring>
+#include <limits>
 
 // Define a FormatValidator class template to validate a format expression
 // of a given CHAR type.  To enable use in runtime library code as well as
@@ -28,6 +29,71 @@
 
 namespace Fortran::common {
 
+// AddOverflow and MulOverflow are copied from
+// llvm/include/llvm/Support/MathExtras.h and specialised to int64_t.
+
+/// Add two signed integers, computing the two's complement truncated result,
+/// returning true if overflow occurred.
+static inline bool AddOverflow(int64_t X, int64_t Y, int64_t &Result) {
+#if __has_builtin(__builtin_add_overflow)
+  return __builtin_add_overflow(X, Y, &Result);
+#else
+  // Perform the unsigned addition.
+  const uint64_t UX = static_cast<uint64_t>(X);
+  const uint64_t UY = static_cast<uint64_t>(Y);
+  const uint64_t UResult = UX + UY;
+
+  // Convert to signed.
+  Result = static_cast<int64_t>(UResult);
+
+  // Adding two positive numbers should result in a positive number.
+  if (X > 0 && Y > 0) {
+    return Result <= 0;
+  }
+  // Adding two negatives should result in a negative number.
+  if (X < 0 && Y < 0) {
+    return Result >= 0;
+  }
+  return false;
+#endif
+}
+
+/// Multiply two signed integers, computing the two's complement truncated
+/// result, returning true if an overflow occurred.
+static inline bool MulOverflow(int64_t X, int64_t Y, int64_t &Result) {
+#if __has_builtin(__builtin_mul_overflow)
+  return __builtin_mul_overflow(X, Y, &Result);
+#else
+  // Perform the unsigned multiplication on absolute values.
+  const uint64_t UX =
+      X < 0 ? (0 - static_cast<uint64_t>(X)) : static_cast<uint64_t>(X);
+  const uint64_t UY =
+      Y < 0 ? (0 - static_cast<uint64_t>(Y)) : static_cast<uint64_t>(Y);
+  const uint64_t UResult = UX * UY;
+
+  // Convert to signed.
+  const bool IsNegative = (X < 0) ^ (Y < 0);
+  Result = IsNegative ? (0 - UResult) : UResult;
+
+  // If any of the args was 0, result is 0 and no overflow occurs.
+  if (UX == 0 || UY == 0) {
+    return false;
+  }
+
+  // UX and UY are in [1, 2^n], where n is the number of digits.
+  // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
+  // positive) divided by an argument compares to the other.
+  if (IsNegative) {
+    return UX > (static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) +
+                    uint64_t(1)) /
+        UY;
+  } else {
+    return UX >
+        (static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) / UY;
+  }
+#endif
+}
+
 struct FormatMessage {
   const char *text; // message text; may have one %s argument
   const char *arg; // optional %s argument value
@@ -214,16 +280,18 @@ template <typename CHAR> void FormatValidator<CHAR>::NextToken() {
   case '7':
   case '8':
   case '9': {
-    int64_t lastValue;
     const CHAR *lastCursor;
     integerValue_ = 0;
     bool overflow{false};
     do {
-      lastValue = integerValue_;
       lastCursor = cursor_;
-      integerValue_ = 10 * integerValue_ + c - '0';
-      if (lastValue > integerValue_) {
-        overflow = true;
+      if (!overflow) {
+        overflow =
+            MulOverflow(static_cast<int64_t>(10), integerValue_, integerValue_);
+      }
+      if (!overflow) {
+        overflow = AddOverflow(
+            integerValue_, static_cast<int64_t>(c - '0'), integerValue_);
       }
       c = NextChar();
     } while (c >= '0' && c <= '9');
 
@@ -633,13 +633,25 @@ void ReductionProcessor::processReductionArguments(
     }
   }
 
-  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-
   // Reduction variable processing common to both intrinsic operators and
   // procedure designators
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  mlir::OpBuilder::InsertPoint dcIP;
+  constexpr bool isDoConcurrent =
+      std::is_same_v<OpType, fir::DeclareReductionOp>;
+
+  if (isDoConcurrent) {
+    dcIP = builder.saveInsertionPoint();
+    builder.setInsertionPoint(
+        builder.getRegion().getParentOfType<fir::DoConcurrentOp>());
+  }
+
   for (const semantics::Symbol *symbol : reductionSymbols) {
     mlir::Value symVal = converter.getSymbolAddress(*symbol);
+
+    if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+      symVal = declOp.getBase();
+
     mlir::Type eleType;
     auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType());
     if (refType)
@@ -667,13 +679,13 @@ void ReductionProcessor::processReductionArguments(
       // boxed arrays are passed as values not by reference. Unfortunately,
       // we can't pass a box by value to omp.redution_declare, so turn it
       // into a reference
-
+      auto oldIP = builder.saveInsertionPoint();
+      builder.setInsertionPointToStart(builder.getAllocaBlock());
       auto alloca =
           builder.create<fir::AllocaOp>(currentLocation, symVal.getType());
+      builder.restoreInsertionPoint(oldIP);
       builder.create<fir::StoreOp>(currentLocation, symVal, alloca);
       symVal = alloca;
-    } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
-      symVal = declOp.getBase();
     }
 
     // this isn't the same as the by-val and by-ref passing later in the
@@ -693,7 +705,7 @@ void ReductionProcessor::processReductionArguments(
   unsigned idx = 0;
   for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
     auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
-    const auto &kindMap = firOpBuilder.getKindMap();
+    const auto &kindMap = builder.getKindMap();
     std::string reductionName;
     ReductionIdentifier redId;
 
@@ -745,9 +757,12 @@ void ReductionProcessor::processReductionArguments(
     OpType decl = createDeclareReduction<OpType>(
         converter, reductionName, redId, redType, currentLocation, isByRef);
     reductionDeclSymbols.push_back(
-        mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
+        mlir::SymbolRefAttr::get(builder.getContext(), decl.getSymName()));
     ++idx;
   }
+
+  if (isDoConcurrent)
+    builder.restoreInsertionPoint(dcIP);
 }
 
 const semantics::SourceName