diff --git a/onnxruntime/test/ep_weight_sharing_ctx_gen/README.md b/onnxruntime/test/ep_weight_sharing_ctx_gen/README.md index 66b8467bda335..51a405613bea1 100644 --- a/onnxruntime/test/ep_weight_sharing_ctx_gen/README.md +++ b/onnxruntime/test/ep_weight_sharing_ctx_gen/README.md @@ -1,5 +1,8 @@ # ONNXRuntime EP Context Model Generation with Weight Sharing +> [!NOTE] +> This tool is deprecated. Please use the public ONNX Runtime Python APIs to compile models with resource sharing. Refer to the example Python script at the end of this document. + [EP context with weight sharing design doc](https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html#epcontext-with-weight-sharing) OnnxRuntime provides the ep_weight_sharing_ctx_gen tool to automate the weight-sharing workflow. This tool handles the entire process. This tool is specifically designed for weight sharing scenarios, streamlining the EPContext model generation process. @@ -13,6 +16,23 @@ Example: ./ep_weight_sharing_ctx_gen -e qnn -i "soc_model|60 htp_graph_finalizat Options: -e [qnn|tensorrt|openvino|vitisai]: Specifies the compile based provider 'qnn', 'tensorrt', 'openvino', 'vitisai'. Default: 'qnn'. + -p [plugin_ep_config_json_file]: Specify JSON configuration file for a plugin EP. Takes precedence over the '-e' and '-i' options. + + Example JSON configuration that selects plugin EP devices via name: + { + "ep_library_registration_name": "example_plugin_ep", + "ep_library_path": "example_plugin_ep.dll", + "selected_ep_name": "example_plugin_ep", + "default_ep_options": { "key": "value" } + } + + Example JSON configuration that selects plugin EP devices via index: + { + "ep_library_registration_name": "example_plugin_ep", + "ep_library_path": "example_plugin_ep.dll", + "selected_ep_device_indices": [ 0 ], + "default_ep_options": { "key": "value" } + } -v: Show verbose information. -C: Specify session configuration entries as key-value pairs: -C "| |" Refer to onnxruntime_session_options_config_keys.h for valid keys and values. @@ -36,3 +56,49 @@ Options: -h: help ``` + +# Example: Use Python APIs to compile models with resource sharing +Use of the public ORT Python APIs is now recommended for compiling models with resource (e.g., "weight") sharing. +The following snippet shows an example that compiles two models using an example plugin EP. + +```Python +import onnxruntime +import os + +def main(): + ep_name = "example_ep" + ep_lib_path = "example_plugin_ep.dll" + + onnxruntime.register_execution_provider_library(ep_name, os.path.realpath(ep_lib_path)) + + # Find one or more EP devices that correspond to the EP of interest. + # In this example, we pick the first one. + ep_device = next((d for d in onnxruntime.get_ep_devices() if d.ep_name == ep_name), None) + + # These are the names/paths to the input and output models. + input_models = ["model_0.onnx", "model_1.onnx"] + output_models = ["model_0_ctx.onnx", "model_1_ctx.onnx"] + + num_models = len(input_models) + session_options = onnxruntime.SessionOptions() + provider_options = {} # Empty for this example + + # Set option that tells EP to share resources (e.g., weights) across sessions. + session_options.add_session_config_entry("ep.share_ep_contexts", "1") + session_options.add_provider_for_devices([ep_device], provider_options) + + # Compile individual models + for i in range(len(input_models)): + if i == num_models - 1: + # Tell EP that this is the last compiling session that will be sharing resources. + session_options.add_session_config_entry("ep.stop_share_ep_contexts", "1") + + model_compiler = onnxruntime.ModelCompiler( + session_options, + input_models[i], + embed_compiled_data_into_model=False, + ) + model_compiler.compile_to_file(output_models[i]) + + onnxruntime.unregister_execution_provider_library(ep_name) +``` diff --git a/onnxruntime/test/ep_weight_sharing_ctx_gen/command_args_parser.cc b/onnxruntime/test/ep_weight_sharing_ctx_gen/command_args_parser.cc index cecf5575d42a5..15bce163ba16a 100644 --- a/onnxruntime/test/ep_weight_sharing_ctx_gen/command_args_parser.cc +++ b/onnxruntime/test/ep_weight_sharing_ctx_gen/command_args_parser.cc @@ -4,6 +4,7 @@ #include "command_args_parser.h" #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include +#include "nlohmann/json.hpp" #include "test_configuration.h" namespace onnxruntime { @@ -35,6 +37,23 @@ namespace qnnctxgen { "\n" "Options:\n" "\t-e [qnn|tensorrt|openvino|vitisai]: Specifies the compile based provider 'qnn', 'tensorrt', 'openvino', 'vitisai'. Default: 'qnn'.\n" + "\t-p [plugin_ep_config_json_file]: Specify JSON configuration file for a plugin EP. Takes precedence over the '-e' and '-i' options.\n" + "\n" + "\t Example JSON configuration that selects plugin EP devices via EP name:\n" + "\t {\n" + "\t \"ep_library_registration_name\": \"example_plugin_ep\",\n" + "\t \"ep_library_path\": \"example_plugin_ep.dll\",\n" + "\t \"selected_ep_name\": \"example_plugin_ep\",\n" + "\t \"default_ep_options\": { \"key\": \"value\" }\n" + "\t }\n" + "\n" + "\t Example JSON configuration that selects plugin EP devices via index:\n" + "\t {\n" + "\t \"ep_library_registration_name\": \"example_plugin_ep\",\n" + "\t \"ep_library_path\": \"example_plugin_ep.dll\",\n" + "\t \"selected_ep_device_indices\": [ 0 ],\n" + "\t \"default_ep_options\": { \"key\": \"value\" }\n" + "\t }\n" "\t-v: Show verbose information.\n" "\t-C: Specify session configuration entries as key-value pairs: -C \"| |\" \n" "\t Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" @@ -58,6 +77,7 @@ namespace qnnctxgen { "\n" "\t-h: help\n"); } + #ifdef _WIN32 static const ORTCHAR_T* delimiter = L","; #else @@ -110,9 +130,63 @@ static bool ParseSessionConfigs(const std::string& configs_string, return true; } +static bool ParsePluginEpConfig(const std::string& json_file_path, PluginEpConfig& config_out) { + using json = nlohmann::json; + bool success = true; + + ORT_TRY { + std::ifstream ifs{json_file_path}; + if (!ifs) { + std::cerr << "ERROR: Failed to open plugin EP configuration file at path: " + << json_file_path.c_str() << std::endl; + return false; + } + + std::string content(std::istreambuf_iterator{ifs}, + std::istreambuf_iterator{}); + PluginEpConfig config{}; + const auto parsed_json = json::parse(content); + + // required keys + parsed_json.at("ep_library_registration_name").get_to(config.ep_library_registration_name); + parsed_json.at("ep_library_path").get_to(config.ep_library_path); + + // optional keys + config.default_ep_options = parsed_json.value("default_ep_options", {}); + config.selected_ep_name = parsed_json.value("selected_ep_name", {}); + config.selected_ep_device_indices = + parsed_json.value("selected_ep_device_indices", {}); + + if (config.selected_ep_name.empty() == config.selected_ep_device_indices.empty()) { + std::cerr << "ERROR: Plugin EP configuration must specify exactly one of 'selected_ep_name' " + << "or 'selected_ep_device_indices'" << std::endl; + return false; + } + + config_out = std::move(config); + return success; + } + ORT_CATCH(const json::exception& e) { + ORT_HANDLE_EXCEPTION([&]() { + std::string kExampleValidJsonStr = + "{\n" + " \"ep_library_registration_name\": \"example_plugin_ep\",\n" + " \"ep_library_path\": \"/path/to/example_plugin_ep.dll\",\n" + " \"selected_ep_name\": \"example_plugin_ep\"\n" + "}"; + + success = false; + std::cerr << "ERROR: JSON parse error: " << e.what() << std::endl; + std::cerr << "This is an example valid JSON configuration:\n" + << kExampleValidJsonStr.c_str() << std::endl; + }); + } + return success; +} + /*static*/ bool CommandLineParser::ParseArguments(TestConfig& test_config, int argc, ORTCHAR_T* argv[]) { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("e:o:u:i:C:vh"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("e:p:o:u:i:C:vh"))) != -1) { switch (ch) { case 'e': if (!CompareCString(optarg, ORT_TSTR("qnn"))) { @@ -128,6 +202,20 @@ static bool ParseSessionConfigs(const std::string& configs_string, return false; } break; + case 'p': { +#ifdef _MSC_VER + std::string plugin_ep_config_file_path = ToUTF8String(optarg); +#else + std::string plugin_ep_config_file_path = optarg; +#endif + PluginEpConfig plugin_ep_config{}; + if (!ParsePluginEpConfig(plugin_ep_config_file_path, plugin_ep_config)) { + return false; + } + + test_config.machine_config.plugin_ep_config = std::move(plugin_ep_config); + break; + } case 'v': test_config.run_config.f_verbose = true; break; @@ -202,6 +290,11 @@ static bool ParseSessionConfigs(const std::string& configs_string, argc -= optind; argv += optind; + if (argc == 0) { + std::cerr << "ERROR: Did not specify model paths" << std::endl; + return false; + } + ParsePaths(argv[0], test_config.model_file_paths); return true; diff --git a/onnxruntime/test/ep_weight_sharing_ctx_gen/example_plugin_ep_config.json b/onnxruntime/test/ep_weight_sharing_ctx_gen/example_plugin_ep_config.json new file mode 100644 index 0000000000000..f8967d1831582 --- /dev/null +++ b/onnxruntime/test/ep_weight_sharing_ctx_gen/example_plugin_ep_config.json @@ -0,0 +1,6 @@ +{ + "ep_library_registration_name": "example_plugin_ep", + "ep_library_path": "example_plugin_ep.dll", + "selected_ep_name": "example_plugin_ep", + "default_ep_options": { "option_key": "option_value" } +} diff --git a/onnxruntime/test/ep_weight_sharing_ctx_gen/main.cc b/onnxruntime/test/ep_weight_sharing_ctx_gen/main.cc index 18abe1eb131d8..3f2cda26fe9df 100644 --- a/onnxruntime/test/ep_weight_sharing_ctx_gen/main.cc +++ b/onnxruntime/test/ep_weight_sharing_ctx_gen/main.cc @@ -10,6 +10,7 @@ // onnx dependencies #include "onnx/onnx_pb.h" +#include #include using namespace onnxruntime; @@ -81,6 +82,72 @@ static void UpdateEpContextModel(const std::vector> } } +using PluginEpLibraryRegistrationHandle = std::unique_ptr>; + +static PluginEpLibraryRegistrationHandle RegisterPluginEpLibrary(Ort::Env& env, + const std::string& ep_library_registration_name, + const std::basic_string& ep_library_path) { + env.RegisterExecutionProviderLibrary(ep_library_registration_name.c_str(), ep_library_path); + + auto unregister_ep_library = [&env, registration_name = ep_library_registration_name](void* p) { + if (p == nullptr) { + return; + } + + ORT_TRY { + env.UnregisterExecutionProviderLibrary(registration_name.c_str()); + } + ORT_CATCH(const Ort::Exception& e) { + ORT_HANDLE_EXCEPTION([&]() { + std::cerr << "Failed to unregister EP library with name '" << registration_name << "': " + << e.what() << std::endl; + }); + } + }; + + // Set `handle_value` to something not equal to nullptr. The particular value doesn't really matter. + // We are just using the unique_ptr deleter to unregister the EP library. + void* const handle_value = reinterpret_cast(0x1); + return PluginEpLibraryRegistrationHandle{handle_value, unregister_ep_library}; +} + +static bool SetPluginEpSessionOptions(Ort::Env& env, Ort::SessionOptions& session_options, + const qnnctxgen::PluginEpConfig& config, + PluginEpLibraryRegistrationHandle& plugin_ep_library_registration_handle) { + auto lib_registration_handle = RegisterPluginEpLibrary(env, config.ep_library_registration_name, + ToPathString(config.ep_library_path)); + + std::vector ep_devices = env.GetEpDevices(); + std::vector selected_ep_devices{}; + + if (!config.selected_ep_device_indices.empty()) { + for (const auto idx : config.selected_ep_device_indices) { + if (idx >= ep_devices.size()) { + std::cerr << "ERROR: Selected EP device index is out of range (max is " << ep_devices.size() - 1 << "): " + << idx << std::endl; + return false; + } + + selected_ep_devices.push_back(ep_devices[idx]); + } + } else { + std::copy_if(ep_devices.begin(), ep_devices.end(), std::back_inserter(selected_ep_devices), + [&selected_ep_name = std::as_const(config.selected_ep_name)](Ort::ConstEpDevice ep_device) { + return ep_device.EpName() == selected_ep_name; + }); + } + + if (selected_ep_devices.empty()) { + std::cerr << "ERROR: No EP devices were selected" << std::endl; + return false; + } + + session_options.AppendExecutionProvider_V2(env, selected_ep_devices, config.default_ep_options); + plugin_ep_library_registration_handle = std::move(lib_registration_handle); + + return true; +} + #ifdef _WIN32 int real_main(int argc, wchar_t* argv[]) { #else @@ -98,6 +165,7 @@ int real_main(int argc, char* argv[]) { Ort::Env env(logging_level, "ep_weight_sharing"); ORT_TRY { + PluginEpLibraryRegistrationHandle plugin_ep_library_registration_handle{}; Ort::SessionOptions so; so.SetLogId("ep_weight_sharing_ctx_gen_session_logger"); // Set default session option to dump EPContext model with non-embed mode @@ -136,7 +204,14 @@ int real_main(int argc, char* argv[]) { // The context binary file generated later includes all graphs from previous models { std::string provider_name_ = test_config.machine_config.provider_type_name; - if (provider_name_ == onnxruntime::kQnnExecutionProvider) { + + if (const auto& plugin_ep_config = test_config.machine_config.plugin_ep_config; plugin_ep_config.has_value()) { + if (!SetPluginEpSessionOptions(env, so, *plugin_ep_config, plugin_ep_library_registration_handle)) { + std::cerr << "ERROR: Failed to initialize session for plugin EP " + << test_config.machine_config.plugin_ep_config->ep_library_path << std::endl; + return 1; + } + } else if (provider_name_ == onnxruntime::kQnnExecutionProvider) { #ifdef USE_QNN so.AppendExecutionProvider("QNN", provider_options); #else diff --git a/onnxruntime/test/ep_weight_sharing_ctx_gen/test_configuration.h b/onnxruntime/test/ep_weight_sharing_ctx_gen/test_configuration.h index 198d03211f561..6dfb7b60ddc27 100644 --- a/onnxruntime/test/ep_weight_sharing_ctx_gen/test_configuration.h +++ b/onnxruntime/test/ep_weight_sharing_ctx_gen/test_configuration.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -14,8 +15,25 @@ namespace onnxruntime { namespace qnnctxgen { +// Configuration for initializing the dynamic plugin EP infrastructure. +struct PluginEpConfig { + std::string ep_library_registration_name{}; + std::string ep_library_path{}; + + // Note: Exactly one of `selected_ep_name` or `selected_ep_device_indices` should be set. + // An empty value for either means it is unset. + + // Specifies the EP devices matching this EP name as the selected EP devices. + std::string selected_ep_name{}; + // Specifies the selected EP devices by their indices. + std::vector selected_ep_device_indices{}; + + std::unordered_map default_ep_options{}; +}; + struct MachineConfig { std::string provider_type_name{onnxruntime::kQnnExecutionProvider}; + std::optional plugin_ep_config = std::nullopt; }; struct RunConfig { diff --git a/onnxruntime/test/python/helper.py b/onnxruntime/test/python/helper.py index 2a2c3fc9b4532..99960640fe92e 100644 --- a/onnxruntime/test/python/helper.py +++ b/onnxruntime/test/python/helper.py @@ -1,4 +1,5 @@ import os +import sys def get_name(name): @@ -13,3 +14,14 @@ def get_name(name): if os.path.exists(res): return res raise FileNotFoundError(f"Unable to find '{name}' or '{rel}' or '{res}'") + + +def get_shared_library_filename_for_platform(base_name): + if sys.platform.startswith("win"): + return base_name + ".dll" + + if sys.platform.startswith("darwin"): + return "lib" + base_name + ".dylib" + + # Else, assume linux + return "lib" + base_name + ".so" diff --git a/onnxruntime/test/python/onnxruntime_test_python_compile_api.py b/onnxruntime/test/python/onnxruntime_test_python_compile_api.py index e46cdb4f98850..c60307d3c0116 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_compile_api.py +++ b/onnxruntime/test/python/onnxruntime_test_python_compile_api.py @@ -10,7 +10,7 @@ import onnx from autoep_helper import AutoEpTestCase -from helper import get_name +from helper import get_name, get_shared_library_filename_for_platform import onnxruntime as onnxrt from onnxruntime.capi.onnxruntime_pybind11_state import Fail, ModelRequiresCompilation @@ -53,6 +53,52 @@ def test_compile_with_files_prefer_npu_policy(self): self.assertTrue(os.path.exists(output_model_path)) self.unregister_execution_provider_library(ep_name) + def test_compile_shared_resources_plugin_ep(self): + """ + Test compiling two example models using weight sharing (via example plugin EP) + """ + ep_lib_path = get_shared_library_filename_for_platform("example_plugin_ep") + try: + ep_lib_path = get_name(ep_lib_path) + except FileNotFoundError: + self.skipTest(f"Skipping test because EP library '{ep_lib_path}' cannot be found") + + ep_name = "example_ep" + self.register_execution_provider_library(ep_name, os.path.realpath(ep_lib_path)) + + ep_device = next((d for d in onnxrt.get_ep_devices() if d.ep_name == ep_name), None) + self.assertIsNotNone(ep_device) + + input_models = [get_name("add_mul_add.onnx"), get_name("mul_1.onnx")] + output_models = [ + os.path.join(self._tmp_dir_path, "output_model_0_ctx.onnx"), + os.path.join(self._tmp_dir_path, "output_model_1_ctx.onnx"), + ] + + num_models = len(input_models) + session_options = onnxrt.SessionOptions() + + # Set option that tells EP to share resources (e.g., weights) across sessions. The example plugin EP + # doesn't actually do anything special, but we do this to test the API + session_options.add_session_config_entry("ep.share_ep_contexts", "1") + session_options.add_provider_for_devices([ep_device], {}) + + # Compile individual models + for i in range(num_models): + if i == num_models - 1: + # Tell EP that this is the last session that will be sharing resources. + session_options.add_session_config_entry("ep.stop_share_ep_contexts", "1") + + model_compiler = onnxrt.ModelCompiler( + session_options, + input_models[i], + embed_compiled_data_into_model=False, + ) + model_compiler.compile_to_file(output_models[i]) + self.assertTrue(os.path.exists(output_models[i])) + + self.unregister_execution_provider_library(ep_name) + def test_compile_with_ep_selection_delegate(self): """ Tests compiling a model (to/from files) using an EP selection delegate callback.