Skip to content

Commit ae48bc2

Browse files
[NPUW] Make weights bank name unique by default (#32321)
1 parent cbcb8d5 commit ae48bc2

File tree

5 files changed

+51
-6
lines changed

5 files changed

+51
-6
lines changed

src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,18 @@ std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_
712712
return std::nullopt;
713713
}
714714

715+
void apply_weights_bank_name(ov::AnyMap& config, const std::string& bank_name) {
716+
auto it = config.find("NPUW_WEIGHTS_BANK");
717+
if (it != config.end()) {
718+
if (it->second.as<std::string>().empty()) {
719+
NPUW_ASSERT(false && "NPUW_WEIGHTS_BANK is empty in the provided config! Please use non-empty name to "
720+
"share the model weights.");
721+
}
722+
} else {
723+
config["NPUW_WEIGHTS_BANK"] = bank_name;
724+
}
725+
}
726+
715727
ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
716728
ov::AnyMap config = {
717729
{"NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm"},
@@ -720,7 +732,6 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
720732
{"NPUW_FOLD", "YES"},
721733
{"NPUW_DCOFF_TYPE", "f16"},
722734
{"NPUW_DCOFF_SCALE", "YES"},
723-
{"NPUW_WEIGHTS_BANK", "shared"},
724735
{"NPUW_SLICE_OUT", "YES"},
725736
{"NPUW_FUNCALL_ASYNC", "YES"}};
726737
// FIXME: this config logic is getting more and more complex
@@ -1109,6 +1120,13 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
11091120
merge_config_with(prefill_config, prefill_config_addition_value);
11101121
merge_config_with(generate_config, generate_config_addition_value);
11111122

1123+
// Generate a random weights bank name unique to this LLMCompiledModel object
1124+
auto weights_bank_name = ov::npuw::util::generate_random_string();
1125+
LOG_VERB("Generated a unique weights bank name: " << weights_bank_name);
1126+
1127+
apply_weights_bank_name(prefill_config, weights_bank_name);
1128+
apply_weights_bank_name(generate_config, weights_bank_name);
1129+
11121130
// Handle attention hints. FIXME: Maybe it makes sense to make those
11131131
// mutually exclusive with the precise configuration sections as well
11141132
const ov::AnyMap dyn_attn_opts = {
@@ -1163,8 +1181,10 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
11631181
auto lm_head_config = get_default_lm_head_config(npudesc);
11641182
merge_config_with(lm_head_config, other_props);
11651183
auto lm_head_config_addition_value = lm_head_config_addition.value_or(ov::AnyMap{}).as<ov::AnyMap>();
1166-
11671184
merge_config_with(lm_head_config, lm_head_config_addition_value);
1185+
1186+
apply_weights_bank_name(lm_head_config, weights_bank_name);
1187+
11681188
m_lm_head_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
11691189
ov::npuw::ICompiledModel::create(lm_head_model, plugin, lm_head_config));
11701190
NPUW_ASSERT(m_lm_head_compiled);

src/plugins/intel_npu/src/plugin/npuw/util.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,20 @@ ov::npuw::util::TensorPtr ov::npuw::util::allocMem(const ov::element::Type type,
916916
return ov::get_tensor_impl(ov::make_tensor(remote_tensor));
917917
}
918918

919+
std::string ov::npuw::util::generate_random_string(std::size_t size) {
920+
static constexpr auto chars = "0123456789"
921+
"abcdefghijklmnopqrstuvwxyz"
922+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
923+
std::random_device rd;
924+
std::mt19937 gen(rd());
925+
std::uniform_int_distribution dist({}, std::strlen(chars) - 1);
926+
std::string result(size, '\0');
927+
std::generate_n(result.begin(), size, [&]() {
928+
return chars[dist(gen)];
929+
});
930+
return result;
931+
}
932+
919933
bool ov::npuw::util::matchStringWithLoRAPattern(const std::string& input, const std::string& pattern_suffix) {
920934
std::string pattern = "^lora_state.*" + pattern_suffix + "$";
921935
std::regex regex_pattern(pattern);

src/plugins/intel_npu/src/plugin/npuw/util.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#pragma once
66

7+
#include <random>
78
#include <string>
89

910
#include "llm_compiled_model_utils.hpp"
@@ -171,6 +172,8 @@ struct Unique {
171172
}
172173
};
173174

175+
std::string generate_random_string(std::size_t size = 32);
176+
174177
using TensorPtr = ov::SoPtr<ov::ITensor>;
175178
TensorPtr allocMem(const ov::element::Type type,
176179
const ov::Shape& shape,

src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@ class BankManager {
3636
std::mutex m_mutex;
3737
};
3838

39+
Bank::Bank(const std::shared_ptr<const ov::ICore>& core, const std::string& alloc_device, const std::string& bank_name)
40+
: m_core(core),
41+
m_alloc_device(alloc_device),
42+
m_bank_name(bank_name) {
43+
if (m_bank_name.empty()) {
44+
auto unique_name = ov::npuw::util::generate_random_string();
45+
LOG_WARN("Got an empty name for weights bank! Using a uniquely generated instead: " << unique_name);
46+
m_bank_name = unique_name;
47+
}
48+
}
49+
3950
int64_t Bank::registerLT(const LazyTensor& tensor, const std::string& device) {
4051
const std::string& device_for_alloc = m_alloc_device.empty() ? device : m_alloc_device;
4152

src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@ namespace weights {
2525

2626
class Bank {
2727
public:
28-
Bank(const std::shared_ptr<const ov::ICore>& core, const std::string& alloc_device, const std::string& bank_name)
29-
: m_core(core),
30-
m_alloc_device(alloc_device),
31-
m_bank_name(bank_name) {}
28+
Bank(const std::shared_ptr<const ov::ICore>& core, const std::string& alloc_device, const std::string& bank_name);
3229

3330
// Register LazyTensor in a bank if it's not there. Returns LazyTensor's unique id
3431
int64_t registerLT(const LazyTensor& tensor, const std::string& device);

0 commit comments

Comments
 (0)