Skip to content

Commit 9e83aea

Browse files
[NPU] Acknowledge OV model compression in weightless compilation (#34609)
### Details: - OV model compression may produce models with multiple constants pointing to the same binary data. If this is the case, rely on compiler to provide actual constant description (shape and precision), keeping the plugin code simple. Additionally, introduce several checks to ensure that the buffer sizes are the same. They are relatively cheap computation-wise, yet allow to ensure model invariants. - Changes and description by @andrey-golubev, small addition by me. ### Tickets: - *EISW-204968* ### AI Assistance: - *AI assistance used: no* - *If yes, summarize how AI was used and what human validation was performed (build/tests/manual checks).* --------- Co-authored-by: Golubev, Andrey <andrey.golubev@intel.com>
1 parent 68322ce commit 9e83aea

File tree

2 files changed

+39
-7
lines changed

2 files changed

+39
-7
lines changed

src/plugins/intel_npu/src/compiler_adapter/src/model_serializer.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ void storeWeightsPointerAttribute(const std::shared_ptr<ov::Model>& model) {
185185
* @param model Both source and target.
186186
*/
187187
void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
188+
std::unordered_map<size_t, size_t> wca_offset_to_size;
188189
size_t constantId = 0;
190+
189191
for (auto&& node : model->get_ordered_ops()) {
190192
if (ov::is_type<ov::op::v0::Constant>(node)) {
191193
ov::RTMap& runtimeInfoMap = node->get_rt_info();
@@ -195,6 +197,16 @@ void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
195197
const std::string constantIdString = std::to_string(constantId++);
196198
if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
197199
auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
200+
201+
if (!wca_offset_to_size.count(weightlessCacheAttr.bin_offset)) {
202+
wca_offset_to_size[weightlessCacheAttr.bin_offset] = weightlessCacheAttr.original_size;
203+
} else {
204+
OPENVINO_ASSERT(
205+
wca_offset_to_size.at(weightlessCacheAttr.bin_offset) == weightlessCacheAttr.original_size,
206+
"The WeightlessCacheAttribute of at least two Constant nodes use the same offset, but "
207+
"different sizes");
208+
}
209+
198210
model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
199211
model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
200212
model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);

src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,17 @@ std::unordered_map<size_t, std::shared_ptr<ov::op::v0::Constant>> get_all_consta
4141
const auto& weightlessCacheAttrIt = runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
4242
if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
4343
auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
44-
constants[weightlessCacheAttr.bin_offset] = constantNode;
44+
45+
auto& constant = constants[weightlessCacheAttr.bin_offset];
46+
if (constant != nullptr) {
47+
// if multiple constants point to the same buffer, ensure that
48+
// their binary sizes are the same
49+
OPENVINO_ASSERT(constant->get_byte_size() == constantNode->get_byte_size(),
50+
"Found ov::Constant that points to the common buffer but has mismatching byte size. "
51+
"This may indicate a bug in OV model compression.");
52+
continue;
53+
}
54+
constant = constantNode;
4555
}
4656
}
4757

@@ -381,23 +391,33 @@ WeightlessGraph::InputData WeightlessGraph::allocate_inputs(
381391
auto currentInputBufferLocation =
382392
static_cast<unsigned char*>(const_cast<void*>(initInputsAllocatedTensor->data(ov::element::Type_t::u8))) +
383393
offset;
394+
const auto tensorShapeFromCompiler = descriptor.shapeFromCompiler.to_shape();
384395
const size_t currentInputSize =
385-
ov::util::get_memory_size(descriptor.precision, shape_size(descriptor.shapeFromCompiler.to_shape()));
396+
ov::util::get_memory_size(descriptor.precision, shape_size(tensorShapeFromCompiler));
386397

387-
std::shared_ptr<ov::op::v0::Constant> constant;
388398
const size_t id = std::stoi(descriptor.nameFromCompiler);
389-
OPENVINO_ASSERT(constants.count(id) > 0,
399+
auto constantIt = constants.find(id);
400+
OPENVINO_ASSERT(constantIt != constants.end(),
390401
"Weights ID ",
391402
id,
392403
" not found in the model constants. This may indicate a mismatch between the model and the "
393404
"metadata of the compiled model.");
394405

395-
constant = constants.at(id);
396-
406+
const auto constant = constantIt->second;
407+
OPENVINO_ASSERT(constant->get_byte_size() == currentInputSize,
408+
"Binary size mismatch found for weights ID ",
409+
id,
410+
" between the model and compiled metadata.");
397411
std::memcpy(currentInputBufferLocation, constant->get_data_ptr(), currentInputSize);
398412

413+
// Note: Use compiler-provided precision and shape, because duplicates -
414+
// constants that point to the same binary data - can in theory have
415+
// different shape or even type (OV model compression only guarantees
416+
// that the data is the same). In order to avoid any potential issues
417+
// due to shape/type mismatches, init tensors should align with
418+
// compiler's expectations.
399419
initInputsViewTensors.push_back(
400-
ov::make_tensor(constant->get_element_type(), constant->get_shape(), currentInputBufferLocation));
420+
ov::make_tensor(descriptor.precision, tensorShapeFromCompiler, currentInputBufferLocation));
401421
offset += currentInputSize;
402422

403423
// Note: By construction of the weight schedule, every constant from OV

0 commit comments

Comments
 (0)