From d22afbe06883f6406b7360011538f893199e0f13 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 21 Jan 2025 11:43:33 +0000 Subject: [PATCH 1/6] Add support for ArenaCfg configuration options --- src/onnxruntime.cc | 108 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index acdb81e..29a4caa 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -302,6 +302,30 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) } } + // Set use_device_allocator_for_initializers + { + triton::common::TritonJson::Value params; + if (ModelConfig().Find("parameters", ¶ms)) { + triton::common::TritonJson::Value json_value; + const char* use_device_allocator_for_initializers_key = + "session.use_device_allocator_for_initializers"; + if (params.Find(use_device_allocator_for_initializers_key, &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + (std::string("Configuring ") + + use_device_allocator_for_initializers_key + " to " + string_value) + .c_str()); + THROW_IF_BACKEND_MODEL_ORT_ERROR(ort_api->AddSessionConfigEntry( + soptions, use_device_allocator_for_initializers_key, + string_value.c_str())); + } + } + } + // memory configs // enable/disable mem arena { @@ -762,8 +786,90 @@ ModelState::LoadModel( rel_cuda_options(cuda_options, ort_api->ReleaseCUDAProviderOptions); cuda_options_map["device_id"] = std::to_string(instance_group_device_id); cuda_options_map["has_user_compute_stream"] = stream != nullptr ? "1" : "0"; + + // Memory arena config + OrtArenaCfg* arena_cfg = nullptr; + { + triton::common::TritonJson::Value params; + if (model_config_.Find("parameters", ¶ms)) { + triton::common::TritonJson::Value json_value; + std::vector keys; + std::vector values; + if (params.Find("max_mem", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("max_mem"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (params.Find("arena_extend_strategy", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("arena_extend_strategy"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (params.Find("initial_chunk_size_bytes", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("initial_chunk_size_bytes"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (params.Find("initial_growth_chunk_size_bytes", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("initial_growth_chunk_size_bytes"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (params.Find("max_dead_bytes_per_chunk", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("max_dead_bytes_per_chunk"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (params.Find("max_power_of_two_extend_bytes", &json_value)) { + std::string string_value; + THROW_IF_BACKEND_MODEL_ERROR( + json_value.MemberAsString("string_value", &string_value)); + keys.push_back("max_power_of_two_extend_bytes"); + size_t value; + RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); + values.push_back(value); + } + if (!keys.empty()) { + RETURN_IF_ORT_ERROR(ort_api->CreateArenaCfgV2( + keys.data(), values.data(), keys.size(), &arena_cfg)); + + std::ostringstream oss; + for (size_t i = 0; i < keys.size(); ++i) { + oss << keys[i] << "=" << values[i] << ", "; + } + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + (std::string("Updated arena config options: ") + oss.str()) + .c_str()); + } + } + } + std::unique_ptrReleaseArenaCfg)> + rel_arena_cfg(arena_cfg, ort_api->ReleaseArenaCfg); RETURN_IF_ORT_ERROR(ort_api->UpdateCUDAProviderOptionsWithValue( - rel_cuda_options.get(), "default_memory_arena_cfg", nullptr)); + rel_cuda_options.get(), "default_memory_arena_cfg", + rel_arena_cfg.get())); + { // Parse CUDA EP configurations directly from the parameters field. // This is deprecated with adding support for CUDA EP in the From 7d3934ae2ab1d7d99c4cf27d63c5e417d17c364e Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 21 Jan 2025 14:04:19 +0000 Subject: [PATCH 2/6] Undo ArenaCfg --- src/onnxruntime.cc | 84 +--------------------------------------------- 1 file changed, 1 insertion(+), 83 deletions(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index 29a4caa..02fa220 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -786,90 +786,8 @@ ModelState::LoadModel( rel_cuda_options(cuda_options, ort_api->ReleaseCUDAProviderOptions); cuda_options_map["device_id"] = std::to_string(instance_group_device_id); cuda_options_map["has_user_compute_stream"] = stream != nullptr ? "1" : "0"; - - // Memory arena config - OrtArenaCfg* arena_cfg = nullptr; - { - triton::common::TritonJson::Value params; - if (model_config_.Find("parameters", ¶ms)) { - triton::common::TritonJson::Value json_value; - std::vector keys; - std::vector values; - if (params.Find("max_mem", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("max_mem"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (params.Find("arena_extend_strategy", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("arena_extend_strategy"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (params.Find("initial_chunk_size_bytes", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("initial_chunk_size_bytes"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (params.Find("initial_growth_chunk_size_bytes", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("initial_growth_chunk_size_bytes"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (params.Find("max_dead_bytes_per_chunk", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("max_dead_bytes_per_chunk"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (params.Find("max_power_of_two_extend_bytes", &json_value)) { - std::string string_value; - THROW_IF_BACKEND_MODEL_ERROR( - json_value.MemberAsString("string_value", &string_value)); - keys.push_back("max_power_of_two_extend_bytes"); - size_t value; - RETURN_IF_ERROR(ParseUnsignedLongLongValue(string_value, &value)); - values.push_back(value); - } - if (!keys.empty()) { - RETURN_IF_ORT_ERROR(ort_api->CreateArenaCfgV2( - keys.data(), values.data(), keys.size(), &arena_cfg)); - - std::ostringstream oss; - for (size_t i = 0; i < keys.size(); ++i) { - oss << keys[i] << "=" << values[i] << ", "; - } - LOG_MESSAGE( - TRITONSERVER_LOG_VERBOSE, - (std::string("Updated arena config options: ") + oss.str()) - .c_str()); - } - } - } - std::unique_ptrReleaseArenaCfg)> - rel_arena_cfg(arena_cfg, ort_api->ReleaseArenaCfg); RETURN_IF_ORT_ERROR(ort_api->UpdateCUDAProviderOptionsWithValue( - rel_cuda_options.get(), "default_memory_arena_cfg", - rel_arena_cfg.get())); - + rel_cuda_options.get(), "default_memory_arena_cfg", nullptr)); { // Parse CUDA EP configurations directly from the parameters field. // This is deprecated with adding support for CUDA EP in the From bd59969b2827ea694bcc068b33d535883d67b127 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Wed, 22 Jan 2025 07:21:05 +0000 Subject: [PATCH 3/6] Update --- src/onnxruntime.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index 02fa220..dd12906 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -316,8 +316,9 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) LOG_MESSAGE( TRITONSERVER_LOG_VERBOSE, - (std::string("Configuring ") + - use_device_allocator_for_initializers_key + " to " + string_value) + (std::string("Configuring '") + + use_device_allocator_for_initializers_key + "' to '" + + string_value + "' for '" + Name() + "'") .c_str()); THROW_IF_BACKEND_MODEL_ORT_ERROR(ort_api->AddSessionConfigEntry( soptions, use_device_allocator_for_initializers_key, From 013d274504599ff36f0a9518322adb5b760c03d1 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Wed, 22 Jan 2025 14:29:35 +0530 Subject: [PATCH 4/6] Update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index e61c31f..4331ae9 100644 --- a/README.md +++ b/README.md @@ -283,6 +283,7 @@ for more information. * `memory.enable_memory_arena_shrinkage`: See [this](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h) for more information. +* `session.use_device_allocator_for_initializers`: Use "1" to enable using device allocator for allocating initialized tensor memory and "0" to disable. The default is "0". See [this](https://onnxruntime.ai/docs/get-started/with-c.html) for more information. ### Command line options From 1706eafb1776dae05ac5d8933a616b4ae7d1fd7c Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Wed, 22 Jan 2025 14:48:59 +0530 Subject: [PATCH 5/6] Update --- src/onnxruntime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc index dd12906..b4091e6 100644 --- a/src/onnxruntime.cc +++ b/src/onnxruntime.cc @@ -302,7 +302,7 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) } } - // Set use_device_allocator_for_initializers + // Enable/disable use_device_allocator_for_initializers { triton::common::TritonJson::Value params; if (ModelConfig().Find("parameters", ¶ms)) { From b051a83a0d7c75f8742fb785c0bf6e30ffe08ae8 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Wed, 22 Jan 2025 16:38:10 +0530 Subject: [PATCH 6/6] Update copyright --- README.md | 2 +- src/onnxruntime.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4331ae9..ef3efd1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@