Skip to content

Commit f3a9edf

Browse files
authored
[CPU] Weightless cache support (#32143)
### Details: - *Weightless cache support in the CPU plugin* - *...* ### Tickets: - *161826*
1 parent 67fddb4 commit f3a9edf

File tree

29 files changed

+921
-190
lines changed

29 files changed

+921
-190
lines changed

samples/cpp/benchmark_app/main.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,11 @@ int main(int argc, char* argv[]) {
606606
if (is_virtual_device(device)) {
607607
device_nstreams.erase(device);
608608
}
609+
610+
if (!FLAGS_cache_dir.empty()) {
611+
// Choose between better model compilation time and cache file size.
612+
device_config[ov::cache_mode.name()] = ov::CacheMode::OPTIMIZE_SPEED;
613+
}
609614
}
610615
auto result = std::find_if(config.begin(), config.end(), [&](const std::pair<std::string, ov::AnyMap>& item) {
611616
return device_name.find(item.first) == 0;

src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
#include "openvino/pass/pattern/op/or.hpp"
5151
#include "openvino/pass/pattern/op/wrap_type.hpp"
5252
#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
53-
#include "transformations/convert_precision.hpp"
5453
#include "transformations/fp16_compression/mark_floatpoint_range.hpp"
5554
#include "transformations/rt_info/disable_fp16_compression.hpp"
5655
#include "transformations/utils/utils.hpp"

src/core/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,7 @@ endif()
122122
# some sources are located in openvino_core, while headers are in openvino_transformations
123123
file(GLOB_RECURSE smart_reshape_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/smart_reshape/*.cpp)
124124
file(GLOB_RECURSE rt_info_srcs ${CMAKE_CURRENT_SOURCE_DIR}/src/pass/rt_info/*.cpp)
125-
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_precision.cpp"
126-
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
127-
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/init_node_info.cpp"
125+
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/pass/convert_fp32_to_fp16.cpp"
128126
"${CMAKE_CURRENT_SOURCE_DIR}/src/pass/serialize.cpp"
129127
"${CMAKE_CURRENT_SOURCE_DIR}/src/op/type_relaxed.cpp"
130128
"${CMAKE_CURRENT_SOURCE_DIR}/src/preprocess/preprocess_steps_impl.cpp"

src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute {
3535

3636
bool is_copyable() const override;
3737

38+
bool visit_attributes(AttributeVisitor& visitor) override;
39+
3840
size_t original_size;
3941
size_t bin_offset;
4042
ov::element::Type original_dtype;

src/core/dev_api/openvino/xml_util/xml_serialize_util.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,7 @@ class OPENVINO_API XmlSerializer : public ov::AttributeVisitor {
7676
virtual void append_rt_info(pugi::xml_node& node, ov::RTMap& attributes);
7777
virtual bool append_rt_attribute(pugi::xml_node& node, const ov::RuntimeAttribute& attribute);
7878
virtual bool append_node_attributes(ov::Node& node);
79-
virtual util::ConstantWriter& get_constant_write_handler() const {
80-
return m_constant_node_write_handler;
81-
}
79+
virtual util::ConstantWriter& get_constant_write_handler();
8280

8381
public:
8482
XmlSerializer(pugi::xml_node& data,

src/core/src/op/util/weightless_caching_attributes.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (C) 2024 Intel Corporation
1+
// Copyright (C) 2018-2025 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33
//
44

@@ -11,6 +11,13 @@ bool ov::WeightlessCacheAttribute::is_copyable() const {
1111
return false;
1212
}
1313

14+
bool ov::WeightlessCacheAttribute::visit_attributes(AttributeVisitor& visitor) {
15+
visitor.on_attribute("original_dtype", original_dtype);
16+
visitor.on_attribute("bin_offset", bin_offset);
17+
visitor.on_attribute("original_size", original_size);
18+
return true;
19+
}
20+
1421
OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr<ov::Node>& from,
1522
const std::shared_ptr<ov::Node>& to) {
1623
const auto& rt_info = from->get_rt_info();

src/core/src/xml_util/xml_serialize_util.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,9 +1107,9 @@ bool XmlSerializer::append_node_attributes(ov::Node& node) {
11071107
return node.visit_attributes(*this);
11081108
}
11091109

1110-
// util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
1111-
// return m_constant_node_write_handler.get();
1112-
// }
1110+
util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
1111+
return m_constant_node_write_handler.get();
1112+
}
11131113

11141114
std::string get_ir_precision_name(const element::Type& precision) {
11151115
switch (precision) {

src/core/xml_util/include/openvino/xml_util/xml_deserialize_util.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,20 @@
2121
namespace ov::util {
2222
struct GenericLayerParams;
2323

24+
template <class T>
25+
void str_to_container(const std::string& value, T& res) {
26+
std::stringstream ss(value);
27+
std::string field;
28+
while (getline(ss, field, ',')) {
29+
if (field.empty())
30+
OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
31+
std::stringstream fs(field);
32+
typename T::value_type val;
33+
fs >> val;
34+
res.insert(res.end(), val);
35+
}
36+
}
37+
2438
class XmlDeserializer : public ov::AttributeVisitor {
2539
public:
2640
explicit XmlDeserializer(const pugi::xml_node& node,
@@ -53,6 +67,9 @@ class XmlDeserializer : public ov::AttributeVisitor {
5367
virtual void set_constant_num_buffer(ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>& adapter);
5468

5569
const pugi::xml_node& get_node() const;
70+
const std::shared_ptr<ov::AlignedBuffer>& get_weights() const {
71+
return m_weights;
72+
}
5673

5774
private:
5875
struct IoMap {

src/core/xml_util/src/xml_deserialize_util.cpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,26 +42,12 @@ bool getStrAttribute(const pugi::xml_node& node, const std::string& name, std::s
4242
return true;
4343
}
4444

45-
template <class T>
46-
void str_to_container(const std::string& value, T& res) {
47-
std::stringstream ss(value);
48-
std::string field;
49-
while (getline(ss, field, ',')) {
50-
if (field.empty())
51-
OPENVINO_THROW("Cannot get vector of parameters! \"", value, "\" is incorrect");
52-
std::stringstream fs(field);
53-
typename T::value_type val;
54-
fs >> val;
55-
res.insert(res.end(), val);
56-
}
57-
}
58-
5945
template <class T>
6046
bool getParameters(const pugi::xml_node& node, const std::string& name, std::vector<T>& value) {
6147
std::string param;
6248
if (!getStrAttribute(node, name, param))
6349
return false;
64-
str_to_container(param, value);
50+
ov::util::str_to_container(param, value);
6551
return true;
6652
}
6753

src/inference/src/dev/core_impl.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "openvino/runtime/threading/executor_manager.hpp"
3131
#include "openvino/util/common_util.hpp"
3232
#include "openvino/util/file_util.hpp"
33+
#include "openvino/util/log.hpp"
3334
#include "openvino/util/shared_object.hpp"
3435
#include "openvino/util/variant_visitor.hpp"
3536
#include "openvino/util/xml_parse_utils.hpp"
@@ -851,6 +852,16 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
851852
const auto compiled_config = create_compile_config(plugin, parsed._config);
852853
cache_content.blobId = ModelCache::compute_hash(model, cache_content.modelPath, compiled_config);
853854
cache_content.model = model;
855+
856+
const auto& cache_mode_it = config.find(cache_mode.name());
857+
if (cache_mode_it != config.end() && cache_mode_it->second == CacheMode::OPTIMIZE_SIZE) {
858+
const auto& rt_info = model->get_rt_info();
859+
auto weights_path = rt_info.find("__weights_path");
860+
if (weights_path != rt_info.end()) {
861+
parsed._config[ov::weights_path.name()] = weights_path->second;
862+
}
863+
}
864+
854865
const auto lock = cacheGuard.get_hash_lock(cache_content.blobId);
855866
res = load_model_from_cache(cache_content, plugin, parsed._config, {}, [&]() {
856867
return compile_model_and_cache(plugin, model, parsed._config, {}, cache_content);
@@ -1594,10 +1605,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
15941605
update_config[ov::hint::model.name()] = cacheContent.model;
15951606
}
15961607

1597-
if (util::contains(plugin.get_property(ov::supported_properties), ov::hint::model) &&
1598-
cacheContent.model) {
1599-
update_config[ov::hint::model.name()] = cacheContent.model;
1600-
}
16011608
if (util::contains(plugin.get_property(ov::supported_properties), ov::weights_path)) {
16021609
util::Path weights_path;
16031610

@@ -1606,7 +1613,6 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
16061613
weights_path = path_hint->second.as<std::string>();
16071614
} else if (weights_path = extract_weight_path(header.get_runtime_info()); weights_path.empty()) {
16081615
weights_path = cacheContent.modelPath;
1609-
weights_path.replace_extension(".bin");
16101616
}
16111617
weights_path.replace_extension(".bin");
16121618

@@ -1638,9 +1644,11 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
16381644
// throw;
16391645
}
16401646

1641-
// fallback scenario
1642-
if (!compiled_model)
1647+
// Fallback scenario
1648+
if (!compiled_model) {
1649+
OPENVINO_WARN("Could not load model from cache.");
16431650
compiled_model = compile_model_lambda();
1651+
}
16441652

16451653
return compiled_model;
16461654
}

0 commit comments

Comments
 (0)