Skip to content

Commit d951954

Browse files
authored
CVS-175504: Additional single bin simplifications + fixes for bi-directional compatibility (#851)
* Modify shared context lifetime * Provide more helpful error message when failing to deserialize bin * Remove unused clear functions * Remove unused variable
1 parent 75c11ae commit d951954

File tree

11 files changed

+69
-129
lines changed

11 files changed

+69
-129
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,30 +43,18 @@ static bool ShouldExportEpContext(const SessionContext& session_context, const S
4343
}
4444

4545
BackendManager::BackendManager(SessionContext& session_context,
46-
SharedContextManager& shared_context_manager,
46+
SharedContext& shared_context,
4747
const onnxruntime::Node& fused_node,
4848
const onnxruntime::GraphViewer& subgraph,
4949
const logging::Logger& logger,
5050
EPCtxHandler& ep_ctx_handle) : ep_ctx_handle_(ep_ctx_handle),
5151
session_context_(session_context),
52-
shared_context_manager_(shared_context_manager) {
52+
shared_context_(shared_context) {
5353
subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
5454
// If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute
5555
subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph,
5656
session_context_.onnx_model_path_name.filename().replace_extension("xml").string());
5757

58-
if (subgraph_context_.is_ep_ctx_graph && !subgraph_context_.is_ep_ctx_ovir_encapsulated) {
59-
shared_context_ = ep_ctx_handle.GetSharedContextForEpContextSubgraph(subgraph,
60-
session_context_.GetModelPath());
61-
} else if (session_context_.so_context_enable && session_context_.so_share_ep_contexts) {
62-
shared_context_ = shared_context_manager_.GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
63-
} else {
64-
// Creating a shared context to satisfy backend. It won't be used for weight sharing.
65-
// Don't make it the active share context since we don't actually want to share it.
66-
shared_context_ = shared_context_manager_.GetOrCreateSharedContext(session_context_.GetOutputBinPath());
67-
}
68-
ORT_ENFORCE(shared_context_, "Could not create a shared context.");
69-
7058
subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
7159
// return empty if graph has no inputs or if types are not one of FP32/FP16
7260
// else assume the type of the first input
@@ -138,7 +126,7 @@ BackendManager::BackendManager(SessionContext& session_context,
138126
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
139127
session_context_,
140128
subgraph_context_,
141-
*shared_context_,
129+
shared_context_,
142130
model_stream);
143131
} catch (std::string const& msg) {
144132
ORT_THROW(msg);
@@ -161,13 +149,13 @@ BackendManager::BackendManager(SessionContext& session_context,
161149
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
162150
session_context_,
163151
subgraph_context_,
164-
*shared_context_,
152+
shared_context_,
165153
model_stream);
166154
}
167155

168156
if (ShouldExportEpContext(session_context_, subgraph_context_)) {
169157
if (concrete_backend_) {
170-
shared_context_->AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel());
158+
shared_context_.AddNativeBlob(subgraph_context_.subgraph_name, concrete_backend_->GetOVCompiledModel());
171159
} else {
172160
ORT_THROW(
173161
"Exporting dynamically compiled models at runtime is not supported. "
@@ -193,19 +181,11 @@ void BackendManager::TryExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVi
193181
if (session_context_.so_context_embed_mode) { // Internal blob
194182
if (include_embed_data) {
195183
std::stringstream ss;
196-
shared_context_->Serialize(ss);
184+
shared_context_.Serialize(ss);
197185
model_blob_str = std::move(ss).str();
198186
}
199187
} else { // External blob
200-
// Build name by combining EpCtx model name (if available) and subgraph name. Model
201-
// name is not available in when creating a session from memory
202-
auto name = session_context_.so_context_file_path.stem().string();
203-
if (name.empty() && !graph_body_viewer.ModelPath().empty()) {
204-
name = graph_body_viewer.ModelPath().stem().string();
205-
}
206-
ORT_ENFORCE(!name.empty());
207-
208-
model_blob_str = shared_context_->GetBinPath().filename().string();
188+
model_blob_str = shared_context_.GetBinPath().filename().string();
209189
}
210190

211191
auto status = ep_ctx_handle_.AddOVEPCtxNodeToGraph(graph_body_viewer,
@@ -521,7 +501,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
521501
if ((session_context_.device_type.find("NPU") != std::string::npos) &&
522502
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
523503
std::unique_ptr<onnxruntime::Model> model;
524-
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, *shared_context_);
504+
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_);
525505
auto model_proto = model->ToProto();
526506
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
527507
print_model_proto_duration();
@@ -788,7 +768,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
788768
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
789769
session_context_,
790770
subgraph_context_,
791-
*shared_context_,
771+
shared_context_,
792772
model_stream);
793773
} catch (const OnnxRuntimeException& ex) {
794774
// Build option disables fallback to CPU on compilation failures with NPU.
@@ -808,7 +788,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
808788
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
809789
session_context_,
810790
subgraph_context_,
811-
*shared_context_,
791+
shared_context_,
812792
model_stream);
813793
} catch (std::string const& msg) {
814794
ORT_THROW(msg);

onnxruntime/core/providers/openvino/backend_manager.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace openvino_ep {
2020
class BackendManager {
2121
public:
2222
BackendManager(SessionContext& session_context,
23-
SharedContextManager& shared_context_manager,
23+
SharedContext& shared_context,
2424
const onnxruntime::Node& fused_node,
2525
const onnxruntime::GraphViewer& subgraph,
2626
const logging::Logger& logger,
@@ -59,8 +59,7 @@ class BackendManager {
5959
SubGraphContext subgraph_context_;
6060
EPCtxHandler& ep_ctx_handle_;
6161
SessionContext& session_context_;
62-
SharedContextManager& shared_context_manager_;
63-
std::shared_ptr<SharedContext> shared_context_;
62+
SharedContext& shared_context_;
6463
};
6564

6665
} // namespace openvino_ep

onnxruntime/core/providers/openvino/contexts.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,12 @@ struct SessionContext : ProviderInfo {
9797
return onnx_model_path_name.empty() ? so_context_file_path : onnx_model_path_name;
9898
}
9999

100-
const std::filesystem::path GetOutputBinPath() const {
101-
std::filesystem::path bin_file_name = so_context_file_path;
102-
if (bin_file_name.empty()) {
103-
bin_file_name = onnx_model_path_name;
104-
}
100+
const std::filesystem::path& GetOutputModelPath() const {
101+
return so_context_file_path.empty() ? onnx_model_path_name : so_context_file_path;
102+
}
103+
104+
std::filesystem::path GetOutputBinPath() const {
105+
const auto& bin_file_name = GetOutputModelPath();
105106
if (bin_file_name.empty()) {
106107
return {};
107108
}

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -93,29 +93,6 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer,
9393
return Status::OK();
9494
}
9595

96-
std::shared_ptr<SharedContext> EPCtxHandler::GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const {
97-
if (!CheckForOVEPCtxNodeInGraph(subgraph_view)) {
98-
return nullptr;
99-
}
100-
101-
auto first_index = *subgraph_view.GetNodesInTopologicalOrder().begin();
102-
auto node = subgraph_view.GetNode(first_index);
103-
ORT_ENFORCE(node != nullptr);
104-
auto& attrs = node->GetAttributes();
105-
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) == 1);
106-
const auto& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s();
107-
108-
ORT_ENFORCE(attrs.count(EMBED_MODE) == 1);
109-
bool embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
110-
111-
std::filesystem::path bin_path{};
112-
if (!embed_mode) {
113-
bin_path = ep_context_path.parent_path() / ep_cache_context;
114-
}
115-
116-
return shared_context_manager_->GetOrCreateSharedContext(bin_path);
117-
}
118-
11996
std::unique_ptr<ModelBlobWrapper> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
12097
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
12198
auto node = graph_viewer.GetNode(first_index);
@@ -218,10 +195,12 @@ bool EPCtxHandler::CheckEPCacheContextAttribute(const GraphViewer& graph_viewer,
218195
return false;
219196
}
220197

221-
void EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const std::filesystem::path& ep_context_dir) {
198+
std::shared_ptr<SharedContext> EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const SessionContext& session_context) {
222199
bool has_embed_nodes = false;
223200
bool has_non_embed_nodes = false;
224201
bool has_main_context = false;
202+
203+
std::shared_ptr<SharedContext> shared_context{};
225204
for (const auto& fused_node_graph : fused_nodes) {
226205
const GraphViewer& graph_viewer = fused_node_graph.filtered_graph;
227206

@@ -241,28 +220,29 @@ void EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAnd
241220
if (attrs.count(EMBED_MODE) == 1) {
242221
embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
243222
}
244-
has_embed_nodes |= embed_mode;
245-
has_non_embed_nodes |= !embed_mode;
246223

247224
bool main_context = true;
248225
if (attrs.count(MAIN_CONTEXT) == 1) {
249226
main_context = static_cast<bool>(attrs.at(MAIN_CONTEXT).i());
250227
}
228+
251229
has_main_context |= main_context;
230+
has_embed_nodes |= embed_mode;
231+
has_non_embed_nodes |= !embed_mode;
252232

253233
const std::string& ep_cache_context = attrs.at(EP_CACHE_CONTEXT).s();
254234
if (embed_mode) {
255235
std::filesystem::path dummy_path{};
256-
auto shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path);
236+
shared_context = shared_context_manager_->GetOrCreateSharedContext(dummy_path);
257237
if (main_context) {
258238
ORT_ENFORCE(!ep_cache_context.empty(), "Embedded EP context is indicated but EP_CACHE_CONTEXT attribute is empty.");
259239
std::istringstream ss(ep_cache_context);
260240
shared_context->Deserialize(ss);
261241
}
262242
} else {
263-
std::filesystem::path ep_context_path = ep_context_dir / ep_cache_context;
243+
std::filesystem::path ep_context_path = session_context.GetOutputModelPath().parent_path() / ep_cache_context;
264244
if (ep_context_path.extension() != ".xml") {
265-
auto shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path);
245+
shared_context = shared_context_manager_->GetOrCreateSharedContext(ep_context_path);
266246
shared_context->Deserialize();
267247
}
268248
}
@@ -272,6 +252,13 @@ void EPCtxHandler::Initialize(const std::vector<IExecutionProvider::FusedNodeAnd
272252
"Mixed embed and non-embed EP context nodes are not supported in a single model.");
273253
ORT_ENFORCE(!(has_embed_nodes && !has_main_context),
274254
"Expected at least one main context node when embedded EP context nodes are present.");
255+
256+
// No ep context nodes found - create a shared context that can hold native blobs or shared weights.
257+
if (!shared_context) {
258+
shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context.GetOutputBinPath());
259+
}
260+
261+
return shared_context;
275262
}
276263

277264
} // namespace openvino_ep

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@
99

1010
#include "core/providers/shared_library/provider_api.h"
1111
#include "core/framework/execution_provider.h"
12-
#include "ov_bin_manager.h"
1312
#include "ov_shared_context.h"
13+
#include "contexts.h"
1414

1515
namespace onnxruntime {
1616
namespace openvino_ep {
1717

18-
class SharedBinManager;
19-
2018
struct ModelBlobWrapper {
2119
ModelBlobWrapper(std::unique_ptr<std::istream> stream, const ov::Tensor& tensor) : stream_(std::move(stream)), tensor_(tensor) {}
2220
std::unique_ptr<std::istream> stream_;
@@ -38,7 +36,6 @@ class EPCtxHandler {
3836
EPCtxHandler(std::string ov_sdk_version, const logging::Logger& logger, std::shared_ptr<SharedContextManager> shared_context_manager);
3937
EPCtxHandler(const EPCtxHandler&) = delete; // No copy constructor
4038
bool CheckForOVEPCtxNodeInGraph(const GraphViewer& subgraph_view) const;
41-
std::shared_ptr<SharedContext> GetSharedContextForEpContextSubgraph(const GraphViewer& subgraph_view, const std::filesystem::path& ep_context_path) const;
4239
bool CheckForOVEPCtxNode(const Node& node) const;
4340
Status AddOVEPCtxNodeToGraph(const GraphViewer& subgraph_view,
4441
const std::string& graph_name,
@@ -47,7 +44,7 @@ class EPCtxHandler {
4744
std::unique_ptr<ModelBlobWrapper> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& subgraph_view) const;
4845
InlinedVector<const Node*> GetEPCtxNodes() const;
4946
bool CheckEPCacheContextAttribute(const GraphViewer& subgraph_view, const std::string& target_attr_extn) const;
50-
void Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const std::filesystem::path& ep_context_path);
47+
std::shared_ptr<SharedContext> Initialize(const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes, const SessionContext& session_context);
5148

5249
private:
5350
const std::string openvino_sdk_version_;

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -110,22 +110,17 @@ common::Status OpenVINOExecutionProvider::Compile(
110110
std::string("Invalid EP context configuration: ") + kOrtSessionOptionEpContextEmbedMode + " must be 0 if " + kOrtSessionOptionShareEpContexts + " is 1.");
111111
}
112112

113-
bool is_epctx_model = false;
114113
if (!fused_nodes.empty()) {
115114
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
116115
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
117116
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
118117
session_context_.onnx_opset_version =
119118
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);
120-
121-
// OVIR wrapped in epctx should be treated as source but this code does not
122-
// This corner case is not in use and will be addressed in a future commit
123-
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
124119
}
125120

126-
if (is_epctx_model) {
127-
ep_ctx_handle_.Initialize(fused_nodes, session_context_.GetOutputBinPath().parent_path());
128-
}
121+
shared_context_ = ep_ctx_handle_.Initialize(fused_nodes, session_context_);
122+
ORT_ENFORCE(shared_context_,
123+
"Failed to create or retrieve SharedContext");
129124

130125
struct OpenVINOEPFunctionState {
131126
AllocateFunc allocate_func = nullptr;
@@ -145,7 +140,7 @@ common::Status OpenVINOExecutionProvider::Compile(
145140
// For original model, check if the user wants to export a model with pre-compiled blob
146141

147142
auto& backend_manager = backend_managers_.emplace_back(session_context_,
148-
*shared_context_manager_,
143+
*shared_context_,
149144
fused_node,
150145
graph_body_viewer,
151146
logger,
@@ -199,11 +194,9 @@ common::Status OpenVINOExecutionProvider::Compile(
199194

200195
// bit clunky ideally we should try to fold this into ep context handler
201196
if (!session_context_.so_context_embed_mode) {
202-
auto shared_context = shared_context_manager_->GetOrCreateActiveSharedContext(session_context_.GetOutputBinPath());
203-
shared_context->Serialize();
197+
shared_context_->Serialize();
204198
if (session_context_.so_stop_share_ep_contexts) {
205199
shared_context_manager_->ClearActiveSharedContext();
206-
shared_context->Clear();
207200
}
208201
}
209202
}

onnxruntime/core/providers/openvino/openvino_execution_provider.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
8181
SessionContext session_context_;
8282
std::shared_ptr<OVCore> ov_core_;
8383
std::shared_ptr<SharedContextManager> shared_context_manager_;
84+
std::shared_ptr<SharedContext> shared_context_;
8485

8586
std::list<BackendManager> backend_managers_; // EP session owns the backend objects
8687
EPCtxHandler ep_ctx_handle_;

0 commit comments

Comments
 (0)