ROCm
diff --git a/‎bolt/lib/Core/BinaryEmitter.cpp‎
Lines changed: 6 additions & 20 deletions b/‎bolt/lib/Core/BinaryEmitter.cpp‎
Lines changed: 6 additions & 20 deletions
diff --git a/‎bolt/lib/Rewrite/RewriteInstance.cpp‎
Lines changed: 2 additions & 35 deletions b/‎bolt/lib/Rewrite/RewriteInstance.cpp‎
Lines changed: 2 additions & 35 deletions
diff --git a/‎clang/include/clang/AST/OpenACCClause.h‎
Lines changed: 44 additions & 0 deletions b/‎clang/include/clang/AST/OpenACCClause.h‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/OpenACCClauses.def‎
Lines changed: 2 additions & 0 deletions b/‎clang/include/clang/Basic/OpenACCClauses.def‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/arm_sve.td‎
Lines changed: 31 additions & 0 deletions b/‎clang/include/clang/Basic/arm_sve.td‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎clang/include/clang/Sema/SemaOpenACC.h‎
Lines changed: 3 additions & 0 deletions b/‎clang/include/clang/Sema/SemaOpenACC.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎clang/lib/AST/OpenACCClause.cpp‎
Lines changed: 32 additions & 1 deletion b/‎clang/lib/AST/OpenACCClause.cpp‎
Lines changed: 32 additions & 1 deletion
diff --git a/‎clang/lib/AST/StmtProfile.cpp‎
Lines changed: 9 additions & 0 deletions b/‎clang/lib/AST/StmtProfile.cpp‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎clang/lib/AST/TextNodeDumper.cpp‎
Lines changed: 2 additions & 0 deletions b/‎clang/lib/AST/TextNodeDumper.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clang/lib/CodeGen/BackendUtil.cpp‎
Lines changed: 3 additions & 3 deletions b/‎clang/lib/CodeGen/BackendUtil.cpp‎
Lines changed: 3 additions & 3 deletions
@@ -761,30 +761,16 @@ void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) {
       continue;
     if (opts::PrintJumpTables)
       JT.print(BC.outs());
-    if (opts::JumpTables == JTS_BASIC && BC.HasRelocations) {
+    if (opts::JumpTables == JTS_BASIC) {
       JT.updateOriginal();
     } else {
       MCSection *HotSection, *ColdSection;
-      if (opts::JumpTables == JTS_BASIC) {
-        // In non-relocation mode we have to emit jump tables in local sections.
-        // This way we only overwrite them when the corresponding function is
-        // overwritten.
-        std::string Name = ".local." + JT.Labels[0]->getName().str();
-        std::replace(Name.begin(), Name.end(), '/', '.');
-        BinarySection &Section =
-            BC.registerOrUpdateSection(Name, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
-        Section.setAnonymous(true);
-        JT.setOutputSection(Section);
-        HotSection = BC.getDataSection(Name);
-        ColdSection = HotSection;
+      if (BF.isSimple()) {
+        HotSection = ReadOnlySection;
+        ColdSection = ReadOnlyColdSection;
       } else {
-        if (BF.isSimple()) {
-          HotSection = ReadOnlySection;
-          ColdSection = ReadOnlyColdSection;
-        } else {
-          HotSection = BF.hasProfile() ? ReadOnlySection : ReadOnlyColdSection;
-          ColdSection = HotSection;
-        }
+        HotSection = BF.hasProfile() ? ReadOnlySection : ReadOnlyColdSection;
+        ColdSection = HotSection;
       }
       emitJumpTable(JT, HotSection, ColdSection);
     }
 
@@ -78,7 +78,6 @@ namespace opts {
 extern cl::list<std::string> HotTextMoveSections;
 extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
-extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
 extern cl::opt<bool> Lite;
 extern cl::list<std::string> ReorderData;
@@ -3848,20 +3847,6 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) {
     assert(Function.getImageSize() <= Function.getMaxSize() &&
            "Unexpected large function");
 
-    // Map jump tables if updating in-place.
-    if (opts::JumpTables == JTS_BASIC) {
-      for (auto &JTI : Function.JumpTables) {
-        JumpTable *JT = JTI.second;
-        BinarySection &Section = JT->getOutputSection();
-        Section.setOutputAddress(JT->getAddress());
-        Section.setOutputFileOffset(getFileOffsetForAddress(JT->getAddress()));
-        LLVM_DEBUG(dbgs() << "BOLT-DEBUG: mapping JT " << Section.getName()
-                          << " to 0x" << Twine::utohexstr(JT->getAddress())
-                          << '\n');
-        MapSection(Section, JT->getAddress());
-      }
-    }
-
     if (!Function.isSplit())
       continue;
 
@@ -5644,26 +5629,8 @@ void RewriteInstance::rewriteFile() {
     if (Function->getImageAddress() == 0 || Function->getImageSize() == 0)
       continue;
 
-    if (Function->getImageSize() > Function->getMaxSize()) {
-      assert(!BC->isX86() && "Unexpected large function.");
-      if (opts::Verbosity >= 1)
-        BC->errs() << "BOLT-WARNING: new function size (0x"
-                   << Twine::utohexstr(Function->getImageSize())
-                   << ") is larger than maximum allowed size (0x"
-                   << Twine::utohexstr(Function->getMaxSize())
-                   << ") for function " << *Function << '\n';
-
-      // Remove jump table sections that this function owns in non-reloc mode
-      // because we don't want to write them anymore.
-      if (!BC->HasRelocations && opts::JumpTables == JTS_BASIC) {
-        for (auto &JTI : Function->JumpTables) {
-          JumpTable *JT = JTI.second;
-          BinarySection &Section = JT->getOutputSection();
-          BC->deregisterSection(Section);
-        }
-      }
-      continue;
-    }
+    assert(Function->getImageSize() <= Function->getMaxSize() &&
+           "Unexpected large function");
 
     const auto HasAddress = [](const FunctionFragment &FF) {
       return FF.empty() ||
 
@@ -98,6 +98,28 @@ class OpenACCFinalizeClause : public OpenACCClause {
   }
 };
 
+// Represents the 'if_present' clause.
+class OpenACCIfPresentClause : public OpenACCClause {
+protected:
+  OpenACCIfPresentClause(SourceLocation BeginLoc, SourceLocation EndLoc)
+      : OpenACCClause(OpenACCClauseKind::IfPresent, BeginLoc, EndLoc) {}
+
+public:
+  static bool classof(const OpenACCClause *C) {
+    return C->getClauseKind() == OpenACCClauseKind::IfPresent;
+  }
+
+  static OpenACCIfPresentClause *
+  Create(const ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc);
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+};
+
 // Represents the 'independent' clause.
 class OpenACCIndependentClause : public OpenACCClause {
 protected:
@@ -722,6 +744,28 @@ class OpenACCAttachClause final
          ArrayRef<Expr *> VarList, SourceLocation EndLoc);
 };
 
+class OpenACCDetachClause final
+    : public OpenACCClauseWithVarList,
+      public llvm::TrailingObjects<OpenACCDetachClause, Expr *> {
+
+  OpenACCDetachClause(SourceLocation BeginLoc, SourceLocation LParenLoc,
+                      ArrayRef<Expr *> VarList, SourceLocation EndLoc)
+      : OpenACCClauseWithVarList(OpenACCClauseKind::Detach, BeginLoc, LParenLoc,
+                                 EndLoc) {
+    std::uninitialized_copy(VarList.begin(), VarList.end(),
+                            getTrailingObjects<Expr *>());
+    setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
+  }
+
+public:
+  static bool classof(const OpenACCClause *C) {
+    return C->getClauseKind() == OpenACCClauseKind::Detach;
+  }
+  static OpenACCDetachClause *
+  Create(const ASTContext &C, SourceLocation BeginLoc, SourceLocation LParenLoc,
+         ArrayRef<Expr *> VarList, SourceLocation EndLoc);
+};
+
 class OpenACCNoCreateClause final
     : public OpenACCClauseWithVarList,
       public llvm::TrailingObjects<OpenACCNoCreateClause, Expr *> {
 
@@ -38,13 +38,15 @@ VISIT_CLAUSE(Create)
 CLAUSE_ALIAS(PCreate, Create, true)
 CLAUSE_ALIAS(PresentOrCreate, Create, true)
 VISIT_CLAUSE(Default)
+VISIT_CLAUSE(Detach)
 VISIT_CLAUSE(DevicePtr)
 VISIT_CLAUSE(DeviceType)
 CLAUSE_ALIAS(DType, DeviceType, false)
 VISIT_CLAUSE(Finalize)
 VISIT_CLAUSE(FirstPrivate)
 VISIT_CLAUSE(Gang)
 VISIT_CLAUSE(If)
+VISIT_CLAUSE(IfPresent)
 VISIT_CLAUSE(Independent)
 VISIT_CLAUSE(NoCreate)
 VISIT_CLAUSE(NumGangs)
 
@@ -2495,3 +2495,34 @@ let SVETargetGuard = "sve2,fp8dot4", SMETargetGuard ="sme,ssve-fp8dot4" in {
   def SVFDOT_LANE_4WAY :  SInst<"svdot_lane[_f32_mf8]_fpm", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fdot_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_3>]>;
 }
 
+let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = "sme,ssve-fp8fma" in {
+  // 8-bit floating-point multiply-add long to half-precision (bottom)
+  def SVFMLALB   : SInst<"svmlalb[_f16_mf8]_fpm",   "dd~~>", "h", MergeNone, "aarch64_sve_fp8_fmlalb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALB_N : SInst<"svmlalb[_n_f16_mf8]_fpm", "dd~!>", "h", MergeNone, "aarch64_sve_fp8_fmlalb", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point multiply-add long to ha_fpmlf-precision (bottom, indexed)
+  def SVFMLALB_LANE : SInst<"svmlalb_lane[_f16_mf8]_fpm", "dd~~i>", "h", MergeNone, "aarch64_sve_fp8_fmlalb_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_15>]>;
+
+  // 8-bit floating-point multiply-add long to half-precision (top)
+  def SVFMLALT   : SInst<"svmlalt[_f16_mf8]_fpm",   "dd~~>", "h", MergeNone, "aarch64_sve_fp8_fmlalt", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALT_N : SInst<"svmlalt[_n_f16_mf8]_fpm", "dd~!>", "h", MergeNone, "aarch64_sve_fp8_fmlalt", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point multiply-add long to half-precision (top, indexed)
+  def SVFMLALT_LANE : SInst<"svmlalt_lane[_f16_mf8]_fpm", "dd~~i>", "h", MergeNone, "aarch64_sve_fp8_fmlalt_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_15>]>;
+
+  // 8-bit floating-point multiply-add long long to single-precision (all top/bottom variants)
+  def SVFMLALLBB   : SInst<"svmlallbb[_f32_mf8]_fpm",   "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmlallbb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLBB_N : SInst<"svmlallbb[_n_f32_mf8]_fpm", "dd~!>", "f", MergeNone, "aarch64_sve_fp8_fmlallbb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLBT   : SInst<"svmlallbt[_f32_mf8]_fpm",   "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmlallbt", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLBT_N : SInst<"svmlallbt[_n_f32_mf8]_fpm", "dd~!>", "f", MergeNone, "aarch64_sve_fp8_fmlallbt", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLTB   : SInst<"svmlalltb[_f32_mf8]_fpm",   "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLTB_N : SInst<"svmlalltb[_n_f32_mf8]_fpm", "dd~!>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLTT   : SInst<"svmlalltt[_f32_mf8]_fpm",   "dd~~>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFMLALLTT_N : SInst<"svmlalltt[_n_f32_mf8]_fpm", "dd~!>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point multiply-add long long to single-precision (indexed, all top/bottom variants)
+  def SVFMLALLBB_LANE : SInst<"svmlallbb_lane[_f32_mf8]_fpm", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlallbb_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVFMLALLBT_LANE : SInst<"svmlallbt_lane[_f32_mf8]_fpm", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlallbt_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]_fpm", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]_fpm", "dd~~i>", "f", MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode, SetsFPMR], [ImmCheck<3, ImmCheck0_7>]>;
+}
@@ -399,6 +399,7 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::PCreate ||
               ClauseKind == OpenACCClauseKind::PresentOrCreate ||
               ClauseKind == OpenACCClauseKind::Attach ||
+              ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
               ClauseKind == OpenACCClauseKind::Reduction ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
@@ -535,6 +536,7 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::PCreate ||
               ClauseKind == OpenACCClauseKind::PresentOrCreate ||
               ClauseKind == OpenACCClauseKind::Attach ||
+              ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
              "Parsed clause kind does not have a var-list");
@@ -571,6 +573,7 @@ class SemaOpenACC : public SemaBase {
               ClauseKind == OpenACCClauseKind::PCreate ||
               ClauseKind == OpenACCClauseKind::PresentOrCreate ||
               ClauseKind == OpenACCClauseKind::Attach ||
+              ClauseKind == OpenACCClauseKind::Detach ||
               ClauseKind == OpenACCClauseKind::DevicePtr ||
               ClauseKind == OpenACCClauseKind::FirstPrivate) &&
              "Parsed clause kind does not have a var-list");
 
@@ -33,7 +33,8 @@ bool OpenACCClauseWithVarList::classof(const OpenACCClause *C) {
          OpenACCFirstPrivateClause::classof(C) ||
          OpenACCDevicePtrClause::classof(C) ||
          OpenACCDevicePtrClause::classof(C) ||
-         OpenACCAttachClause::classof(C) || OpenACCNoCreateClause::classof(C) ||
+         OpenACCDetachClause::classof(C) || OpenACCAttachClause::classof(C) ||
+         OpenACCNoCreateClause::classof(C) ||
          OpenACCPresentClause::classof(C) || OpenACCCopyClause::classof(C) ||
          OpenACCCopyInClause::classof(C) || OpenACCCopyOutClause::classof(C) ||
          OpenACCReductionClause::classof(C) || OpenACCCreateClause::classof(C);
@@ -277,6 +278,16 @@ OpenACCAttachClause *OpenACCAttachClause::Create(const ASTContext &C,
   return new (Mem) OpenACCAttachClause(BeginLoc, LParenLoc, VarList, EndLoc);
 }
 
+OpenACCDetachClause *OpenACCDetachClause::Create(const ASTContext &C,
+                                                 SourceLocation BeginLoc,
+                                                 SourceLocation LParenLoc,
+                                                 ArrayRef<Expr *> VarList,
+                                                 SourceLocation EndLoc) {
+  void *Mem =
+      C.Allocate(OpenACCDetachClause::totalSizeToAlloc<Expr *>(VarList.size()));
+  return new (Mem) OpenACCDetachClause(BeginLoc, LParenLoc, VarList, EndLoc);
+}
+
 OpenACCDevicePtrClause *OpenACCDevicePtrClause::Create(const ASTContext &C,
                                                        SourceLocation BeginLoc,
                                                        SourceLocation LParenLoc,
@@ -452,6 +463,14 @@ OpenACCFinalizeClause *OpenACCFinalizeClause::Create(const ASTContext &C,
   return new (Mem) OpenACCFinalizeClause(BeginLoc, EndLoc);
 }
 
+OpenACCIfPresentClause *OpenACCIfPresentClause::Create(const ASTContext &C,
+                                                       SourceLocation BeginLoc,
+                                                       SourceLocation EndLoc) {
+  void *Mem = C.Allocate(sizeof(OpenACCIfPresentClause),
+                         alignof(OpenACCIfPresentClause));
+  return new (Mem) OpenACCIfPresentClause(BeginLoc, EndLoc);
+}
+
 //===----------------------------------------------------------------------===//
 //  OpenACC clauses printing methods
 //===----------------------------------------------------------------------===//
@@ -538,6 +557,13 @@ void OpenACCClausePrinter::VisitAttachClause(const OpenACCAttachClause &C) {
   OS << ")";
 }
 
+void OpenACCClausePrinter::VisitDetachClause(const OpenACCDetachClause &C) {
+  OS << "detach(";
+  llvm::interleaveComma(C.getVarList(), OS,
+                        [&](const Expr *E) { printExpr(E); });
+  OS << ")";
+}
+
 void OpenACCClausePrinter::VisitDevicePtrClause(
     const OpenACCDevicePtrClause &C) {
   OS << "deviceptr(";
@@ -697,3 +723,8 @@ void OpenACCClausePrinter::VisitVectorClause(const OpenACCVectorClause &C) {
 void OpenACCClausePrinter::VisitFinalizeClause(const OpenACCFinalizeClause &C) {
   OS << "finalize";
 }
+
+void OpenACCClausePrinter::VisitIfPresentClause(
+    const OpenACCIfPresentClause &C) {
+  OS << "if_present";
+}
@@ -2561,6 +2561,9 @@ void OpenACCClauseProfiler::VisitSelfClause(const OpenACCSelfClause &Clause) {
 void OpenACCClauseProfiler::VisitFinalizeClause(
     const OpenACCFinalizeClause &Clause) {}
 
+void OpenACCClauseProfiler::VisitIfPresentClause(
+    const OpenACCIfPresentClause &Clause) {}
+
 void OpenACCClauseProfiler::VisitNumGangsClause(
     const OpenACCNumGangsClause &Clause) {
   for (auto *E : Clause.getIntExprs())
@@ -2602,6 +2605,12 @@ void OpenACCClauseProfiler::VisitAttachClause(
     Profiler.VisitStmt(E);
 }
 
+void OpenACCClauseProfiler::VisitDetachClause(
+    const OpenACCDetachClause &Clause) {
+  for (auto *E : Clause.getVarList())
+    Profiler.VisitStmt(E);
+}
+
 void OpenACCClauseProfiler::VisitDevicePtrClause(
     const OpenACCDevicePtrClause &Clause) {
   for (auto *E : Clause.getVarList())
 
@@ -409,7 +409,9 @@ void TextNodeDumper::Visit(const OpenACCClause *C) {
     case OpenACCClauseKind::PCopy:
     case OpenACCClauseKind::PresentOrCopy:
     case OpenACCClauseKind::If:
+    case OpenACCClauseKind::IfPresent:
     case OpenACCClauseKind::Independent:
+    case OpenACCClauseKind::Detach:
     case OpenACCClauseKind::DevicePtr:
     case OpenACCClauseKind::Finalize:
     case OpenACCClauseKind::FirstPrivate:
 
@@ -738,6 +738,9 @@ static void addSanitizers(const Triple &TargetTriple,
     if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability))
       MPM.addPass(NumericalStabilitySanitizerPass());
 
+    if (LangOpts.Sanitize.has(SanitizerKind::Realtime))
+      MPM.addPass(RealtimeSanitizerPass());
+
     auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
       if (LangOpts.Sanitize.has(Mask)) {
         bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
@@ -1023,9 +1026,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
             FPM.addPass(BoundsCheckingPass());
           });
 
-    if (LangOpts.Sanitize.has(SanitizerKind::Realtime))
-      MPM.addPass(RealtimeSanitizerPass());
-
     // Don't add sanitizers if we are here from ThinLTO PostLink. That already
     // done on PreLink stage.
     if (!IsThinLTOPostLink) {