@@ -712,6 +712,18 @@ std::optional<ov::Any> pop_option(ov::AnyMap& config, const std::string& option_
712
712
return std::nullopt ;
713
713
}
714
714
715
+ void apply_weights_bank_name (ov::AnyMap& config, const std::string& bank_name) {
716
+ auto it = config.find (" NPUW_WEIGHTS_BANK" );
717
+ if (it != config.end ()) {
718
+ if (it->second .as <std::string>().empty ()) {
719
+ NPUW_ASSERT (false && " NPUW_WEIGHTS_BANK is empty in the provided config! Please use non-empty name to "
720
+ " share the model weights." );
721
+ }
722
+ } else {
723
+ config[" NPUW_WEIGHTS_BANK" ] = bank_name;
724
+ }
725
+ }
726
+
715
727
ov::AnyMap get_baseline_common_config (const std::optional<NPUDesc>& npudesc) {
716
728
ov::AnyMap config = {
717
729
{" NPU_COMPILATION_MODE_PARAMS" , " compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm" },
@@ -720,7 +732,6 @@ ov::AnyMap get_baseline_common_config(const std::optional<NPUDesc>& npudesc) {
720
732
{" NPUW_FOLD" , " YES" },
721
733
{" NPUW_DCOFF_TYPE" , " f16" },
722
734
{" NPUW_DCOFF_SCALE" , " YES" },
723
- {" NPUW_WEIGHTS_BANK" , " shared" },
724
735
{" NPUW_SLICE_OUT" , " YES" },
725
736
{" NPUW_FUNCALL_ASYNC" , " YES" }};
726
737
// FIXME: this config logic is getting more and more complex
@@ -1109,6 +1120,13 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
1109
1120
merge_config_with (prefill_config, prefill_config_addition_value);
1110
1121
merge_config_with (generate_config, generate_config_addition_value);
1111
1122
1123
+ // Generate a random weights bank name unique to this LLMCompiledModel object
1124
+ auto weights_bank_name = ov::npuw::util::generate_random_string ();
1125
+ LOG_VERB (" Generated a unique weights bank name: " << weights_bank_name);
1126
+
1127
+ apply_weights_bank_name (prefill_config, weights_bank_name);
1128
+ apply_weights_bank_name (generate_config, weights_bank_name);
1129
+
1112
1130
// Handle attention hints. FIXME: Maybe it makes sense to make those
1113
1131
// mutually exclusive with the precise configuration sections as well
1114
1132
const ov::AnyMap dyn_attn_opts = {
@@ -1163,8 +1181,10 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
1163
1181
auto lm_head_config = get_default_lm_head_config (npudesc);
1164
1182
merge_config_with (lm_head_config, other_props);
1165
1183
auto lm_head_config_addition_value = lm_head_config_addition.value_or (ov::AnyMap{}).as <ov::AnyMap>();
1166
-
1167
1184
merge_config_with (lm_head_config, lm_head_config_addition_value);
1185
+
1186
+ apply_weights_bank_name (lm_head_config, weights_bank_name);
1187
+
1168
1188
m_lm_head_compiled = std::dynamic_pointer_cast<ov::npuw::CompiledModel>(
1169
1189
ov::npuw::ICompiledModel::create (lm_head_model, plugin, lm_head_config));
1170
1190
NPUW_ASSERT (m_lm_head_compiled);
0 commit comments