microsoft · snnn · Sep 29, 2025 · Oct 7, 2025 · Oct 7, 2025 · Oct 14, 2025
diff --git a/include/onnxruntime/core/common/common.h b/include/onnxruntime/core/common/common.h
@@ -66,6 +66,22 @@ using TimePoint = std::chrono::high_resolution_clock::time_point;
 #define ORT_ATTRIBUTE_UNUSED
 #endif
 
+// ORT_CONSTINIT
+//
+// C++20 constinit keyword ensures that a variable is initialized at compile time
+// and can be safely used in static initialization contexts
+#if defined(__cpp_constinit) && __cpp_constinit >= 201907L
+#define ORT_CONSTINIT constinit
+#elif defined(__clang__) && defined(__has_cpp_attribute)
+#if __has_cpp_attribute(clang::require_constant_initialization)
+#define ORT_CONSTINIT [[clang::require_constant_initialization]]
+#endif
+#endif
+
+#ifndef ORT_CONSTINIT
+#define ORT_CONSTINIT
+#endif
+
 #ifdef ORT_NO_EXCEPTIONS
 // Print the given final message, the message must be a null terminated char*
 // ORT will abort after printing the message.

diff --git a/onnxruntime/core/common/logging/logging.cc b/onnxruntime/core/common/logging/logging.cc
@@ -64,9 +64,10 @@ LoggingManager* LoggingManager::GetDefaultInstance() {
 #pragma warning(disable : 26426)
 #endif
 
+ORT_CONSTINIT static std::mutex default_logger_mutex;
+
 static std::mutex& DefaultLoggerMutex() noexcept {
-  static std::mutex mutex;
-  return mutex;
+  return default_logger_mutex;
 }
 
 Logger* LoggingManager::s_default_logger_ = nullptr;

diff --git a/onnxruntime/core/framework/model_metadef_id_generator.cc b/onnxruntime/core/framework/model_metadef_id_generator.cc
@@ -7,11 +7,14 @@
 #include "core/framework/murmurhash3.h"
 
 namespace onnxruntime {
+namespace {
+ORT_CONSTINIT static std::mutex mutex;
+}
+
 int ModelMetadefIdGenerator::GenerateId(const onnxruntime::GraphViewer& graph_viewer,
                                         HashValue& model_hash) const {
   // if the EP is shared across multiple sessions there's a very small potential for concurrency issues.
   // use a lock when generating an id to be paranoid
-  static std::mutex mutex;
   std::lock_guard<std::mutex> lock(mutex);
   model_hash = 0;
 

diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc
@@ -290,9 +290,12 @@ TensorrtLogger& GetTensorrtLogger(bool verbose_log) {
   return trt_logger;
 }
 
+namespace {
+ORT_CONSTINIT std::mutex trt_api_lock;
+}  // namespace
+
 std::unique_lock<std::mutex> NvExecutionProvider::GetApiLock() const {
-  static std::mutex singleton;
-  return std::unique_lock<std::mutex>(singleton);
+  return std::unique_lock<std::mutex>(trt_api_lock);
 }
 
 /*

diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc
@@ -21,6 +21,11 @@
 #endif
 
 namespace onnxruntime {
+
+namespace {
+ORT_CONSTINIT static std::mutex trt_custom_op_mutex;
+}  // namespace
+
 extern TensorrtLogger& GetTensorrtLogger(bool verbose);
 
 /*
@@ -41,8 +46,7 @@ extern TensorrtLogger& GetTensorrtLogger(bool verbose);
 common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) {
   static std::unique_ptr<OrtCustomOpDomain> custom_op_domain = std::make_unique<OrtCustomOpDomain>();
   static std::vector<std::unique_ptr<TensorRTCustomOp>> created_custom_op_list;
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> lock(mutex);
+  std::lock_guard<std::mutex> lock(trt_custom_op_mutex);
   if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) {
     domain_list.push_back(custom_op_domain.get());
     return Status::OK();

diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc
@@ -790,16 +790,19 @@ Status GetQnnDataType(const bool is_quantized_tensor, const ONNX_NAMESPACE::Type
   return Status::OK();
 }
 
+namespace {
+ORT_CONSTINIT static std::mutex counter_mutex;
+}  // namespace
+
 std::string GetUniqueName(const std::string& base, std::string_view suffix) {
   std::string name = base;
   if (!suffix.empty()) {
     name += suffix;
   }
   {
-    static std::unordered_map<std::string, int> counter;
-    static std::mutex counter_mutex;
     std::lock_guard<std::mutex> lock(counter_mutex);
 
+    static std::unordered_map<std::string, int> counter;
     int& count = counter[name];
     if (count++ > 0) {
       return name + "_" + std::to_string(count);

diff --git a/onnxruntime/core/providers/qnn/ort_api.cc b/onnxruntime/core/providers/qnn/ort_api.cc
@@ -10,11 +10,15 @@
 namespace onnxruntime {
 
 #if BUILD_QNN_EP_STATIC_LIB
+
+namespace {
+ORT_CONSTINIT static std::mutex run_on_unload_mutex;
+}  // namespace
+
 static std::unique_ptr<std::vector<std::function<void()>>> s_run_on_unload_;
 
 void RunOnUnload(std::function<void()> function) {
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> guard(mutex);
+  std::lock_guard<std::mutex> guard(run_on_unload_mutex);
   if (!s_run_on_unload_) {
     s_run_on_unload_ = std::make_unique<std::vector<std::function<void()>>>();
   }

diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@@ -81,6 +81,44 @@ void operator delete(void* p, size_t /*size*/) noexcept { return Provider_GetHos
 #endif
 
 namespace onnxruntime {
+
+namespace {
+
+class OnUnloadManager {
+ public:
+  ~OnUnloadManager() {
+    std::unique_ptr<std::vector<std::function<void()>>> funcs;
+    {
+      std::lock_guard<std::mutex> guard(mutex_);
+      funcs = std::move(run_on_unload_functions_);
+    }
+
+    if (!funcs) {
+      return;
+    }
+
+    for (auto& function : *funcs) {
+      function();
+    }
+  }
+
+  void Add(std::function<void()> function) {
+    std::lock_guard<std::mutex> guard(mutex_);
+    if (!run_on_unload_functions_) {
+      run_on_unload_functions_ = std::make_unique<std::vector<std::function<void()>>>();
+    }
+    run_on_unload_functions_->push_back(std::move(function));
+  }
+
+ private:
+  std::mutex mutex_;
+  std::unique_ptr<std::vector<std::function<void()>>> run_on_unload_functions_;
+};
+
+static OnUnloadManager g_on_unload_manager;
+
+}  // namespace
+
 #if defined(_MSC_VER) && !defined(__clang__)
 #pragma warning(push)
 // "Global initializer calls a non-constexpr function."
@@ -91,30 +129,11 @@ ProviderHostCPU& g_host_cpu = g_host->GetProviderHostCPU();
 #if defined(_MSC_VER) && !defined(__clang__)
 #pragma warning(pop)
 #endif
-static std::unique_ptr<std::vector<std::function<void()>>> s_run_on_unload_;
 
 void RunOnUnload(std::function<void()> function) {
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> guard{mutex};
-  if (!s_run_on_unload_)
-    s_run_on_unload_ = std::make_unique<std::vector<std::function<void()>>>();
-  s_run_on_unload_->push_back(std::move(function));
+  g_on_unload_manager.Add(std::move(function));
 }
 
-// This object is destroyed as part of the DLL unloading code and handles running all of the RunOnLoad functions
-struct OnUnload {
-  ~OnUnload() {
-    if (!s_run_on_unload_)
-      return;
-
-    for (auto& function : *s_run_on_unload_)
-      function();
-
-    s_run_on_unload_.reset();
-  }
-
-} g_on_unload;
-
 void* CPUAllocator::Alloc(size_t size) { return g_host->CPUAllocator__Alloc(this, size); }
 void CPUAllocator::Free(void* p) { g_host->CPUAllocator__Free(this, p); }
 

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -452,9 +452,12 @@ TensorrtLogger& GetTensorrtLogger(bool verbose_log) {
   return trt_logger;
 }
 
+namespace {
+ORT_CONSTINIT std::mutex trt_api_lock;
+}  // namespace
+
 std::unique_lock<std::mutex> TensorrtExecutionProvider::GetApiLock() const {
-  static std::mutex singleton;
-  return std::unique_lock<std::mutex>(singleton);
+  return std::unique_lock<std::mutex>(trt_api_lock);
 }
 
 /*

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc
@@ -27,6 +27,39 @@
 #define ORT_DEF2STR(x) ORT_DEF2STR_HELPER(x)
 
 namespace onnxruntime {
+
+namespace {
+class TrtCustomOpDomainStateLocker;
+
+struct TrtCustomOpDomainState {
+  friend class TrtCustomOpDomainStateLocker;
+
+ private:
+  std::mutex mutex;
+
+ public:
+  std::unique_ptr<OrtCustomOpDomain> custom_op_domain{std::make_unique<OrtCustomOpDomain>()};
+  std::vector<std::unique_ptr<TensorRTCustomOp>> created_custom_op_list;
+  bool is_loaded{false};
+};
+
+class TrtCustomOpDomainStateLocker {
+ public:
+  explicit TrtCustomOpDomainStateLocker(TrtCustomOpDomainState& state) : state_{state}, lock_{state_.mutex} {}
+  TrtCustomOpDomainState* operator->() { return &state_; }
+  const TrtCustomOpDomainState* operator->() const { return &state_; }
+
+ private:
+  TrtCustomOpDomainState& state_;
+  std::lock_guard<std::mutex> lock_;
+};
+
+TrtCustomOpDomainState& GetTrtCustomOpDomainState() {
+  static TrtCustomOpDomainState state;
+  return state;
+}
+}  // namespace
+
 extern TensorrtLogger& GetTensorrtLogger(bool verbose);
 
 /*
@@ -45,21 +78,17 @@
  * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation.
  */
 common::Status CreateTensorRTCustomOpDomainList(std::vector<OrtCustomOpDomain*>& domain_list, const std::string extra_plugin_lib_paths) {
-  static std::unique_ptr<OrtCustomOpDomain> custom_op_domain = std::make_unique<OrtCustomOpDomain>();
-  static std::vector<std::unique_ptr<TensorRTCustomOp>> created_custom_op_list;
-  static std::mutex mutex;
-  std::lock_guard<std::mutex> lock(mutex);
-  if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) {
-    domain_list.push_back(custom_op_domain.get());
+  TrtCustomOpDomainStateLocker state(GetTrtCustomOpDomainState());
+  if (state->custom_op_domain->domain_ != "" && state->custom_op_domain->custom_ops_.size() > 0) {
+    domain_list.push_back(state->custom_op_domain.get());
     return Status::OK();
   }
 
   // Load any extra TRT plugin library if any.
   // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry.
   // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator).
   // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n"
-  static bool is_loaded = false;
-  if (!extra_plugin_lib_paths.empty() && !is_loaded) {
+  if (!extra_plugin_lib_paths.empty() && !state->is_loaded) {
     std::stringstream extra_plugin_libs(extra_plugin_lib_paths);
     std::string lib;
     while (std::getline(extra_plugin_libs, lib, ';')) {
@@ -70,7 +99,7 @@
         LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString();
       }
     }
-    is_loaded = true;
+    state->is_loaded = true;
   }
 
   try {
@@ -133,14 +162,14 @@
         continue;
       }
 
-      created_custom_op_list.push_back(std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr));  // Make sure TensorRTCustomOp object won't be cleaned up
-      created_custom_op_list.back().get()->SetName(plugin_name);
-      custom_op_domain->custom_ops_.push_back(created_custom_op_list.back().get());
+      state->created_custom_op_list.push_back(std::make_unique<TensorRTCustomOp>(onnxruntime::kTensorrtExecutionProvider, nullptr));  // Make sure TensorRTCustomOp object won't be cleaned up
+      state->created_custom_op_list.back().get()->SetName(plugin_name);
+      state->custom_op_domain->custom_ops_.push_back(state->created_custom_op_list.back().get());
       registered_plugin_names.insert(plugin_name);
     }
 
-    custom_op_domain->domain_ = "trt.plugins";
-    domain_list.push_back(custom_op_domain.get());
+    state->custom_op_domain->domain_ = "trt.plugins";
+    domain_list.push_back(state->custom_op_domain.get());
   } catch (const std::exception&) {
     LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins";
   }