diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 3426a2781bbc6..b0d574874e505 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -96,7 +96,7 @@ BackendManager::BackendManager(SessionContext& session_context, ptr_stream_t model_stream; std::unique_ptr model_proto; if (subgraph_context_.is_ep_ctx_graph) { - if (!session_context_.reshape.empty()) { + if (!session_context_.reshape.empty() && !subgraph_context_.is_ep_ctx_ovir_encapsulated) { std::string exception_str = "[OpenVINO-EP] Bounded dynamic model execution using provider option reshape_input is not supported for OVEP EPContext model"; ORT_THROW(exception_str); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index d7fc0553fb1d4..451d2b3ae266c 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -63,7 +63,8 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr hw_target, device_config, enable_causallm, - model_file_path()); + model_file_path(), + session_context_); } else { // If the blob is held in an EPContext node, then skip FE+Compile // and directly move on to creating a backend with the executable blob diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 23be3447b8799..4c5d8ba5ad1c1 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -226,7 +226,8 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, std::string& hw_target, const ov::AnyMap& device_config, bool enable_causallm, - std::filesystem::path model_file_path) { + std::filesystem::path model_file_path, + const SessionContext& session_context) { return OvExceptionBoundary([&]() { OVExeNetwork exe; @@ -259,6 +260,11 @@ OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, // Load the model explicitly with XML contents std::shared_ptr model = core.read_model(xml_file_path.string()); + if (!session_context.reshape.empty()) { + LOGS_DEFAULT(INFO) << log_tag << "Reshaping OV-IR model to specified shape"; + model->reshape(session_context.reshape); + } + if (enable_causallm) { exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config); } else { diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 8fc28b8885e5d..aa4b3fbe64898 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -39,6 +39,7 @@ class OVCore; class OVInferRequest; class OVExeNetwork; struct ModelBlobWrapper; +struct SessionContext; typedef ov::Tensor OVTensor; typedef ov::ProfilingInfo OVProfilingInfo; @@ -77,7 +78,8 @@ struct OVCore : WeakSingleton { std::string& hw_target, const ov::AnyMap& device_config, bool enable_causallm, - std::filesystem::path model_file_path); + std::filesystem::path model_file_path, + const SessionContext& session_context); std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const;