Skip to content

Commit 528e9c1

Browse files
authored
[intel_npu] Optimize import_model path when compiled_blob prop is not empty (#30297)
### Details: - *If application sends `ov::hint::compiled_blob(ov::Tensor)` property to `core.import_model`, an additional copy of blob is needed in case of `std::stringstream` being given, otherwise an exception will be thrown because operations like `stream.read` cannot be made on an empty stream (if additional copy is avoided).* - To allow applications to send empty `stringstreams` avoiding additional blob copies this way, logic of extracting the `ov::Tensor` inside `ov::hint::compiled_blob` property must be moved at the beginning of `import_model` function and also changing internal `std::streambuf` of the `std::istream` to be of `ov::SharedStreamBuffer` type. - 3 new tests were added to test changes: - `OVHoldersTestOnImportedNetworkNPU.CanInferAfterCompiledBlobPropTensorIsDestroyed` - `OVCompiledGraphImportExportTestNPU.CanCorrectlyImportModelWithEmptyIStreamAndCompiledBlobProp` - `OVCompiledGraphImportExportTestNPU.CanCorrectlyImportModelWithApplicationHeaderAndCompiledBlobProp` ### Tickets: - *157192*
1 parent da6bd94 commit 528e9c1

File tree

7 files changed

+227
-36
lines changed

7 files changed

+227
-36
lines changed

src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ size_t Graph::export_blob(std::ostream& stream) const {
3535
std::vector<uint8_t> blob;
3636

3737
if (_blobIsReleased) {
38-
OPENVINO_THROW("Model was imported (not compiled) by the plugin. Model export is forbidden in this case!");
38+
OPENVINO_THROW("Model was optimized away. Try importing it using `ov::hint::compiled_blob` property to extend "
39+
"its lifetime.");
3940
}
4041

4142
if (_blobPtr == nullptr) { // when compiling the model using Compiler in Driver, the blob is handled by the driver

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -523,9 +523,30 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap&) con
523523
return std::make_shared<RemoteContextImpl>(_backend);
524524
}
525525

526-
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
526+
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& origStream, const ov::AnyMap& properties) const {
527527
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
528528

529+
ov::AnyMap npu_plugin_properties = properties;
530+
std::shared_ptr<ov::AlignedBuffer> modelBuffer;
531+
ov::SharedStreamBuffer buffer = {nullptr, 0};
532+
std::istream stream{origStream.rdbuf()};
533+
// ov::hint::compiled_blob has no corresponding "Config" implementation thus we need to remove it from the
534+
// list of properties
535+
if (auto blob_it = npu_plugin_properties.find(ov::hint::compiled_blob.name());
536+
blob_it != npu_plugin_properties.end()) {
537+
auto compiled_blob = blob_it->second.as<ov::Tensor>();
538+
modelBuffer = std::make_shared<ov::SharedBuffer<ov::Tensor>>(reinterpret_cast<char*>(compiled_blob.data()),
539+
compiled_blob.get_byte_size(),
540+
compiled_blob);
541+
buffer = ov::SharedStreamBuffer(reinterpret_cast<char*>(compiled_blob.data()), compiled_blob.get_byte_size());
542+
stream.rdbuf(&buffer);
543+
if (auto loadedFromCache = npu_plugin_properties.find(ov::loaded_from_cache.name());
544+
loadedFromCache != npu_plugin_properties.end() && loadedFromCache->second.as<bool>() != false) {
545+
stream.seekg(origStream.tellg(), std::ios::cur); // skip OV header in case of cached blob
546+
}
547+
npu_plugin_properties.erase(blob_it);
548+
}
549+
529550
// If was exported via NPUW
530551
auto stream_start_pos = stream.tellg();
531552
ov::npuw::s11n::IndicatorType serialization_indicator;
@@ -538,25 +559,12 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
538559
stream.seekg(-stream.tellg() + stream_start_pos, std::ios::cur);
539560

540561
// Drop NPUW properties if there are any
541-
ov::AnyMap npu_plugin_properties;
542562
for (auto it = properties.begin(); it != properties.end(); ++it) {
543-
if (it->first.find("NPUW") == it->first.npos) {
544-
npu_plugin_properties.insert(*it);
563+
if (it->first.find("NPUW") != it->first.npos) {
564+
npu_plugin_properties.erase(it);
545565
}
546566
}
547567

548-
std::shared_ptr<ov::AlignedBuffer> modelBuffer;
549-
// ov::hint::compiled_blob has no corresponding "Config" implementation thus we need to remove it from the
550-
// list of properties
551-
if (auto blob_it = npu_plugin_properties.find(ov::hint::compiled_blob.name());
552-
blob_it != npu_plugin_properties.end()) {
553-
auto compiled_blob = blob_it->second.as<ov::Tensor>();
554-
modelBuffer = std::make_shared<ov::SharedBuffer<ov::Tensor>>(reinterpret_cast<char*>(compiled_blob.data()),
555-
compiled_blob.get_byte_size(),
556-
compiled_blob);
557-
npu_plugin_properties.erase(blob_it);
558-
}
559-
560568
CompilerAdapterFactory compilerAdapterFactory;
561569
auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
562570

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (C) 2018-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "overload/compiled_model/import_export.hpp"
6+
7+
#include <gtest/gtest.h>
8+
9+
#include <map>
10+
#include <string>
11+
12+
#include "common/npu_test_env_cfg.hpp"
13+
14+
using namespace ov::test::behavior;
15+
16+
const std::vector<ov::AnyMap> compiledModelConfigs = {{}};
17+
18+
INSTANTIATE_TEST_SUITE_P(
19+
smoke_Behavior_NPU,
20+
OVCompiledGraphImportExportTestNPU,
21+
::testing::Combine(::testing::Values(ov::element::f16 /* not used in internal import_export tests so far */),
22+
::testing::Values(ov::test::utils::DEVICE_NPU),
23+
::testing::ValuesIn(compiledModelConfigs)),
24+
ov::test::utils::appendPlatformTypeTestName<OVCompiledGraphImportExportTestNPU>);
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (C) 2018-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "overload/ov_plugin/life_time.hpp"
6+
7+
#include "common/npu_test_env_cfg.hpp"
8+
#include "common/utils.hpp"
9+
#include "intel_npu/config/options.hpp"
10+
11+
using namespace ov::test::behavior;
12+
13+
namespace {
14+
15+
const std::vector<ov::AnyMap> configs = {{}};
16+
17+
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
18+
OVHoldersTestNPU,
19+
::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
20+
::testing::ValuesIn(configs)),
21+
ov::test::utils::appendPlatformTypeTestName<OVHoldersTestNPU>);
22+
23+
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
24+
OVHoldersTestOnImportedNetworkNPU,
25+
::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
26+
::testing::ValuesIn(configs)),
27+
ov::test::utils::appendPlatformTypeTestName<OVHoldersTestOnImportedNetworkNPU>);
28+
29+
} // namespace
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// Copyright (C) 2018-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "behavior/compiled_model/import_export.hpp"
6+
#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
7+
#include "intel_npu/npu_private_properties.hpp"
8+
#include "openvino/runtime/make_tensor.hpp"
9+
10+
namespace ov {
11+
12+
namespace test {
13+
14+
namespace behavior {
15+
16+
using OVCompiledGraphImportExportTestNPU = OVCompiledGraphImportExportTest;
17+
18+
TEST_P(OVCompiledGraphImportExportTestNPU, CanImportModelWithEmptyIStreamAndCompiledBlobProp) {
19+
ov::Core core;
20+
std::shared_ptr<std::string> strSO;
21+
{
22+
std::stringstream sstream;
23+
auto model = ov::test::utils::make_conv_pool_relu();
24+
core.compile_model(model, target_device, configuration).export_model(sstream);
25+
strSO = std::make_shared<std::string>(sstream.str());
26+
}
27+
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{strSO->size()}, strSO->data());
28+
auto impl = ov::get_tensor_impl(tensor);
29+
impl._so = strSO;
30+
tensor = ov::make_tensor(impl);
31+
configuration.emplace(ov::hint::compiled_blob(tensor));
32+
std::ifstream emptyIFileStream;
33+
std::fstream emptyFileStream;
34+
std::istringstream emptyIStringStream;
35+
std::stringstream emptyStringStream;
36+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(emptyIFileStream, target_device, configuration));
37+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(emptyFileStream, target_device, configuration));
38+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(emptyIStringStream, target_device, configuration));
39+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(emptyStringStream, target_device, configuration));
40+
configuration.erase(ov::hint::compiled_blob.name()); // cleanup
41+
}
42+
43+
TEST_P(OVCompiledGraphImportExportTestNPU, CanImportModelWithApplicationHeaderAndCompiledBlobProp) {
44+
ov::Core core;
45+
const std::string_view headerView("<dummy_application_header>");
46+
const std::string_view suffixView("<dummy_application_suffix>");
47+
std::stringstream sstream;
48+
49+
sstream.write(headerView.data(), headerView.size());
50+
{
51+
auto model = ov::test::utils::make_conv_pool_relu();
52+
core.compile_model(model, target_device, configuration).export_model(sstream);
53+
}
54+
55+
// header tests, stream won't work if not handled by OV caching mechanism
56+
{
57+
auto strSO = std::make_shared<std::string>(sstream.str());
58+
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{strSO->size()}, strSO->data());
59+
auto impl = ov::get_tensor_impl(tensor);
60+
impl._so = strSO;
61+
tensor = ov::make_tensor(impl);
62+
configuration.emplace(ov::hint::compiled_blob(tensor));
63+
sstream.seekg(headerView.size(), std::ios::beg); // skip header
64+
OV_EXPECT_THROW(
65+
auto compiledModel = core.import_model(sstream, target_device, configuration),
66+
ov::Exception,
67+
testing::HasSubstr("metadata")); // OVNPU suffix can be parsed from metadata, but not correct version
68+
configuration.erase(ov::hint::compiled_blob.name()); // cleanup
69+
}
70+
71+
// header tests, stream won't impact import_model if application manages ov::Tensor offset
72+
{
73+
auto strSO = std::make_shared<std::string>(sstream.str());
74+
auto tensor = ov::Tensor(ov::element::u8,
75+
ov::Shape{strSO->size() - headerView.size()},
76+
strSO->data() + headerView.size());
77+
auto impl = ov::get_tensor_impl(tensor);
78+
impl._so = strSO;
79+
tensor = ov::make_tensor(impl);
80+
configuration.emplace(ov::hint::compiled_blob(tensor));
81+
// header is no longer skipped by stream
82+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(sstream, target_device, configuration));
83+
configuration.erase(ov::hint::compiled_blob.name()); // cleanup
84+
}
85+
86+
// suffix tests, stream won't impact import_model if application manages ov::Tensor size
87+
{
88+
sstream.write(suffixView.data(), suffixView.size());
89+
auto strSO = std::make_shared<std::string>(sstream.str());
90+
auto tensor = ov::Tensor(ov::element::u8,
91+
ov::Shape{strSO->size() - headerView.size() - suffixView.size()},
92+
strSO->data() + headerView.size());
93+
auto impl = ov::get_tensor_impl(tensor);
94+
impl._so = strSO;
95+
tensor = ov::make_tensor(impl);
96+
configuration.emplace(ov::hint::compiled_blob(tensor));
97+
OV_ASSERT_NO_THROW(auto compiledModel = core.import_model(sstream, target_device, configuration));
98+
configuration.erase(ov::hint::compiled_blob.name()); // cleanup
99+
}
100+
}
101+
102+
} // namespace behavior
103+
104+
} // namespace test
105+
106+
} // namespace ov

src/plugins/intel_npu/tests/functional/internal/overload/ov_plugin/life_time.hpp

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
#pragma once
66

77
#include "behavior/ov_plugin/life_time.hpp"
8+
#include "common/npu_test_env_cfg.hpp"
89
#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
10+
#include "openvino/runtime/make_tensor.hpp"
911

1012
using CompilationParams = std::tuple<std::string, // Device name
1113
ov::AnyMap // Config
@@ -63,8 +65,10 @@ class OVHoldersTestNPU : public OVPluginTestBase, public testing::WithParamInter
6365

6466
#define EXPECT_NO_CRASH(_statement) EXPECT_EXIT(_statement; exit(0), testing::ExitedWithCode(0), "")
6567

66-
static void release_order_test(std::vector<std::size_t> order, const std::string& deviceName,
67-
std::shared_ptr<ov::Model> function, ov::AnyMap configuration) {
68+
static void release_order_test(std::vector<std::size_t> order,
69+
const std::string& deviceName,
70+
std::shared_ptr<ov::Model> function,
71+
ov::AnyMap configuration) {
6872
ov::AnyVector objects;
6973
{
7074
ov::Core core = createCoreWithTemplate();
@@ -90,7 +94,7 @@ TEST_P(OVHoldersTestNPU, Orders) {
9094
order_str << objects.at(i) << " ";
9195
}
9296
EXPECT_NO_CRASH(release_order_test(order, target_device, function, configuration))
93-
<< "for order: " << order_str.str();
97+
<< "for order: " << order_str.str();
9498
} while (std::next_permutation(order.begin(), order.end()));
9599
}
96100

@@ -151,9 +155,8 @@ TEST_P(OVHoldersTestNPU, LoadedRemoteContext) {
151155
}
152156
}
153157

154-
class OVHoldersTestOnImportedNetworkNPU :
155-
public OVPluginTestBase,
156-
public testing::WithParamInterface<CompilationParams> {
158+
class OVHoldersTestOnImportedNetworkNPU : public OVPluginTestBase,
159+
public testing::WithParamInterface<CompilationParams> {
157160
protected:
158161
ov::AnyMap configuration;
159162
std::string deathTestStyle;
@@ -221,6 +224,39 @@ TEST_P(OVHoldersTestOnImportedNetworkNPU, CreateRequestWithCoreRemoved) {
221224
auto request = compiled_model.create_infer_request();
222225
}
223226

227+
TEST_P(OVHoldersTestOnImportedNetworkNPU, CanInferAfterCompiledBlobPropTensorIsDestroyed) {
228+
ov::Core core = createCoreWithTemplate();
229+
230+
for (size_t i = 0; i < 2; ++i) {
231+
ov::CompiledModel compiled_model;
232+
if (i != 0) {
233+
configuration.emplace(ov::intel_npu::defer_weights_load(true));
234+
}
235+
{
236+
std::stringstream sstream;
237+
core.compile_model(function, target_device, configuration).export_model(sstream);
238+
auto strSO = std::make_shared<std::string>(sstream.str());
239+
auto tensor = ov::Tensor(ov::element::u8, ov::Shape{strSO->size()}, strSO->data());
240+
auto impl = ov::get_tensor_impl(tensor);
241+
impl._so = strSO;
242+
tensor = ov::make_tensor(impl);
243+
configuration.emplace(ov::hint::compiled_blob(tensor));
244+
compiled_model = core.import_model(sstream, target_device, configuration);
245+
configuration.erase(ov::hint::compiled_blob.name()); // cleanup
246+
}
247+
248+
// check if the shared object (strSO destroyed above) persists in compiled_model
249+
std::ostringstream sstream;
250+
ov::InferRequest inferRequest;
251+
OV_ASSERT_NO_THROW(compiled_model.export_model(sstream));
252+
EXPECT_TRUE(sstream.tellp() > 0);
253+
OV_ASSERT_NO_THROW(inferRequest = compiled_model.create_infer_request());
254+
compiled_model = {}; // dtor of compiled model won't affect created infer request
255+
OV_ASSERT_NO_THROW(inferRequest.infer());
256+
}
257+
configuration.erase(ov::intel_npu::defer_weights_load.name()); // cleanup
258+
}
259+
224260
} // namespace behavior
225261
} // namespace test
226262
} // namespace ov

src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_plugin/life_time.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "common/npu_test_env_cfg.hpp"
88
#include "common/utils.hpp"
99
#include "intel_npu/config/options.hpp"
10-
#include "overload/ov_plugin/life_time.hpp"
1110

1211
using namespace ov::test::behavior;
1312

@@ -37,18 +36,6 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
3736
::testing::Values(ov::test::utils::DEVICE_NPU),
3837
getTestCaseName);
3938

40-
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
41-
OVHoldersTestNPU,
42-
::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
43-
::testing::ValuesIn(configs)),
44-
OVHoldersTestNPU::getTestCaseName);
45-
46-
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
47-
OVHoldersTestOnImportedNetworkNPU,
48-
::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
49-
::testing::ValuesIn(configs)),
50-
OVHoldersTestOnImportedNetworkNPU::getTestCaseName);
51-
5239
INSTANTIATE_TEST_SUITE_P(compatibility_smoke_VirtualPlugin_BehaviorTests,
5340
OVHoldersTestWithConfig,
5441
::testing::ValuesIn(device_names_and_priorities),

0 commit comments

Comments
 (0)