diff --git a/include/onnxruntime/core/common/common.h b/include/onnxruntime/core/common/common.h index 820d140ccaabc..5f55b1ddb967c 100644 --- a/include/onnxruntime/core/common/common.h +++ b/include/onnxruntime/core/common/common.h @@ -66,6 +66,22 @@ using TimePoint = std::chrono::high_resolution_clock::time_point; #define ORT_ATTRIBUTE_UNUSED #endif +// ORT_CONSTINIT +// +// C++20 constinit keyword ensures that a variable is initialized at compile time +// and can be safely used in static initialization contexts +#if defined(__cpp_constinit) && __cpp_constinit >= 201907L +#define ORT_CONSTINIT constinit +#elif defined(__clang__) && defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::require_constant_initialization) +#define ORT_CONSTINIT [[clang::require_constant_initialization]] +#endif +#endif + +#ifndef ORT_CONSTINIT +#define ORT_CONSTINIT +#endif + #ifdef ORT_NO_EXCEPTIONS // Print the given final message, the message must be a null terminated char* // ORT will abort after printing the message. diff --git a/onnxruntime/core/common/logging/logging.cc b/onnxruntime/core/common/logging/logging.cc index a79e7300cffce..bb82c607b9544 100644 --- a/onnxruntime/core/common/logging/logging.cc +++ b/onnxruntime/core/common/logging/logging.cc @@ -64,9 +64,10 @@ LoggingManager* LoggingManager::GetDefaultInstance() { #pragma warning(disable : 26426) #endif +ORT_CONSTINIT static std::mutex default_logger_mutex; + static std::mutex& DefaultLoggerMutex() noexcept { - static std::mutex mutex; - return mutex; + return default_logger_mutex; } Logger* LoggingManager::s_default_logger_ = nullptr; diff --git a/onnxruntime/core/framework/model_metadef_id_generator.cc b/onnxruntime/core/framework/model_metadef_id_generator.cc index 2d55aa8360bd2..ea07179c28970 100644 --- a/onnxruntime/core/framework/model_metadef_id_generator.cc +++ b/onnxruntime/core/framework/model_metadef_id_generator.cc @@ -7,11 +7,14 @@ #include "core/framework/murmurhash3.h" namespace onnxruntime { +namespace { +ORT_CONSTINIT static std::mutex mutex; +} + int ModelMetadefIdGenerator::GenerateId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) const { // if the EP is shared across multiple sessions there's a very small potential for concurrency issues. // use a lock when generating an id to be paranoid - static std::mutex mutex; std::lock_guard lock(mutex); model_hash = 0; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 62210d65848d1..9b7a359493bb1 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -290,9 +290,12 @@ TensorrtLogger& GetTensorrtLogger(bool verbose_log) { return trt_logger; } +namespace { +ORT_CONSTINIT std::mutex trt_api_lock; +} // namespace + std::unique_lock NvExecutionProvider::GetApiLock() const { - static std::mutex singleton; - return std::unique_lock(singleton); + return std::unique_lock(trt_api_lock); } /* diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index c8df7c9437adf..fd0fc621de283 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -21,6 +21,11 @@ #endif namespace onnxruntime { + +namespace { +ORT_CONSTINIT static std::mutex trt_custom_op_mutex; +} // namespace + extern TensorrtLogger& GetTensorrtLogger(bool verbose); /* @@ -41,8 +46,7 @@ extern TensorrtLogger& GetTensorrtLogger(bool verbose); common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) { static std::unique_ptr custom_op_domain = std::make_unique(); static std::vector> created_custom_op_list; - static std::mutex mutex; - std::lock_guard lock(mutex); + std::lock_guard lock(trt_custom_op_mutex); if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) { domain_list.push_back(custom_op_domain.get()); return Status::OK(); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc index afa5e3bdbb6d1..0c1e8fe7eeb4e 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_utils.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_utils.cc @@ -790,16 +790,19 @@ Status GetQnnDataType(const bool is_quantized_tensor, const ONNX_NAMESPACE::Type return Status::OK(); } +namespace { +ORT_CONSTINIT static std::mutex counter_mutex; +} // namespace + std::string GetUniqueName(const std::string& base, std::string_view suffix) { std::string name = base; if (!suffix.empty()) { name += suffix; } { - static std::unordered_map counter; - static std::mutex counter_mutex; std::lock_guard lock(counter_mutex); + static std::unordered_map counter; int& count = counter[name]; if (count++ > 0) { return name + "_" + std::to_string(count); diff --git a/onnxruntime/core/providers/qnn/ort_api.cc b/onnxruntime/core/providers/qnn/ort_api.cc index aec09d043d2bc..682673c39d12f 100644 --- a/onnxruntime/core/providers/qnn/ort_api.cc +++ b/onnxruntime/core/providers/qnn/ort_api.cc @@ -10,11 +10,15 @@ namespace onnxruntime { #if BUILD_QNN_EP_STATIC_LIB + +namespace { +ORT_CONSTINIT static std::mutex run_on_unload_mutex; +} // namespace + static std::unique_ptr>> s_run_on_unload_; void RunOnUnload(std::function function) { - static std::mutex mutex; - std::lock_guard guard(mutex); + std::lock_guard guard(run_on_unload_mutex); if (!s_run_on_unload_) { s_run_on_unload_ = std::make_unique>>(); } diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index 0e5df0026d2c0..57db52e32c888 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -81,6 +81,44 @@ void operator delete(void* p, size_t /*size*/) noexcept { return Provider_GetHos #endif namespace onnxruntime { + +namespace { + +class OnUnloadManager { + public: + ~OnUnloadManager() { + std::unique_ptr>> funcs; + { + std::lock_guard guard(mutex_); + funcs = std::move(run_on_unload_functions_); + } + + if (!funcs) { + return; + } + + for (auto& function : *funcs) { + function(); + } + } + + void Add(std::function function) { + std::lock_guard guard(mutex_); + if (!run_on_unload_functions_) { + run_on_unload_functions_ = std::make_unique>>(); + } + run_on_unload_functions_->push_back(std::move(function)); + } + + private: + std::mutex mutex_; + std::unique_ptr>> run_on_unload_functions_; +}; + +static OnUnloadManager g_on_unload_manager; + +} // namespace + #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) // "Global initializer calls a non-constexpr function." @@ -91,30 +129,11 @@ ProviderHostCPU& g_host_cpu = g_host->GetProviderHostCPU(); #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(pop) #endif -static std::unique_ptr>> s_run_on_unload_; void RunOnUnload(std::function function) { - static std::mutex mutex; - std::lock_guard guard{mutex}; - if (!s_run_on_unload_) - s_run_on_unload_ = std::make_unique>>(); - s_run_on_unload_->push_back(std::move(function)); + g_on_unload_manager.Add(std::move(function)); } -// This object is destroyed as part of the DLL unloading code and handles running all of the RunOnLoad functions -struct OnUnload { - ~OnUnload() { - if (!s_run_on_unload_) - return; - - for (auto& function : *s_run_on_unload_) - function(); - - s_run_on_unload_.reset(); - } - -} g_on_unload; - void* CPUAllocator::Alloc(size_t size) { return g_host->CPUAllocator__Alloc(this, size); } void CPUAllocator::Free(void* p) { g_host->CPUAllocator__Free(this, p); } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 508d932459bf9..045ff92e851c7 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -452,9 +452,12 @@ TensorrtLogger& GetTensorrtLogger(bool verbose_log) { return trt_logger; } +namespace { +ORT_CONSTINIT std::mutex trt_api_lock; +} // namespace + std::unique_lock TensorrtExecutionProvider::GetApiLock() const { - static std::mutex singleton; - return std::unique_lock(singleton); + return std::unique_lock(trt_api_lock); } /* diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc index 1e9fafe8aa323..cc1ab77d3e16e 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc @@ -27,6 +27,39 @@ #define ORT_DEF2STR(x) ORT_DEF2STR_HELPER(x) namespace onnxruntime { + +namespace { +class TrtCustomOpDomainStateLocker; + +struct TrtCustomOpDomainState { + friend class TrtCustomOpDomainStateLocker; + + private: + std::mutex mutex; + + public: + std::unique_ptr custom_op_domain{std::make_unique()}; + std::vector> created_custom_op_list; + bool is_loaded{false}; +}; + +class TrtCustomOpDomainStateLocker { + public: + explicit TrtCustomOpDomainStateLocker(TrtCustomOpDomainState& state) : state_{state}, lock_{state_.mutex} {} + TrtCustomOpDomainState* operator->() { return &state_; } + const TrtCustomOpDomainState* operator->() const { return &state_; } + + private: + TrtCustomOpDomainState& state_; + std::lock_guard lock_; +}; + +TrtCustomOpDomainState& GetTrtCustomOpDomainState() { + static TrtCustomOpDomainState state; + return state; +} +} // namespace + extern TensorrtLogger& GetTensorrtLogger(bool verbose); /* @@ -45,12 +78,9 @@ extern TensorrtLogger& GetTensorrtLogger(bool verbose); * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation. */ common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) { - static std::unique_ptr custom_op_domain = std::make_unique(); - static std::vector> created_custom_op_list; - static std::mutex mutex; - std::lock_guard lock(mutex); - if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) { - domain_list.push_back(custom_op_domain.get()); + TrtCustomOpDomainStateLocker state(GetTrtCustomOpDomainState()); + if (state->custom_op_domain->domain_ != "" && state->custom_op_domain->custom_ops_.size() > 0) { + domain_list.push_back(state->custom_op_domain.get()); return Status::OK(); } @@ -58,8 +88,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry. // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator). // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n" - static bool is_loaded = false; - if (!extra_plugin_lib_paths.empty() && !is_loaded) { + if (!extra_plugin_lib_paths.empty() && !state->is_loaded) { std::stringstream extra_plugin_libs(extra_plugin_lib_paths); std::string lib; while (std::getline(extra_plugin_libs, lib, ';')) { @@ -70,7 +99,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString(); } } - is_loaded = true; + state->is_loaded = true; } try { @@ -133,14 +162,14 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& continue; } - created_custom_op_list.push_back(std::make_unique(onnxruntime::kTensorrtExecutionProvider, nullptr)); // Make sure TensorRTCustomOp object won't be cleaned up - created_custom_op_list.back().get()->SetName(plugin_name); - custom_op_domain->custom_ops_.push_back(created_custom_op_list.back().get()); + state->created_custom_op_list.push_back(std::make_unique(onnxruntime::kTensorrtExecutionProvider, nullptr)); // Make sure TensorRTCustomOp object won't be cleaned up + state->created_custom_op_list.back().get()->SetName(plugin_name); + state->custom_op_domain->custom_ops_.push_back(state->created_custom_op_list.back().get()); registered_plugin_names.insert(plugin_name); } - custom_op_domain->domain_ = "trt.plugins"; - domain_list.push_back(custom_op_domain.get()); + state->custom_op_domain->domain_ = "trt.plugins"; + domain_list.push_back(state->custom_op_domain.get()); } catch (const std::exception&) { LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins"; }