From 2c7de13856af08e51984da4dbadca3ca420084dc Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Tue, 11 Nov 2025 13:40:59 +0100 Subject: [PATCH 1/3] Add cache_position for Mamba prepare_inputs --- optimum/intel/openvino/modeling_decoder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 1dd6434641..0198e16a34 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -545,6 +545,9 @@ def prepare_inputs( self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) ) + if "cache_position" in self.input_names: + inputs["cache_position"] = np.arange(0, getattr(self, "conv_kernel"), dtype=int) + return inputs def forward( From c44e641e10d4588b62da1e9f02d54402458f151d Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 12 Nov 2025 09:58:11 +0100 Subject: [PATCH 2/3] Override prepare_inputs method for OVModelWithMambaForCausalLM --- optimum/intel/openvino/modeling_decoder.py | 27 +++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 0198e16a34..46d782725b 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -545,9 +545,6 @@ def prepare_inputs( self.next_beam_idx if self.next_beam_idx is not None else np.arange(batch_size, dtype=int) ) - if "cache_position" in self.input_names: - inputs["cache_position"] = np.arange(0, getattr(self, "conv_kernel"), dtype=int) - return inputs def forward( @@ -1298,15 +1295,20 @@ def _has_cache_inputs(model: openvino.Model) -> bool: "past_key_values" in key.get_any_name() or "cache_params" in key.get_any_name() for key in model.inputs ) - def forward( + def prepare_inputs( self, - input_ids: Optional[torch.LongTensor] = None, + input_ids: torch.LongTensor, attention_mask: Optional[torch.LongTensor] = None, cache_params=None, use_cache: Optional[bool] = None, cache_position: Optional[torch.Tensor] = None, **kwargs, - ): + ) -> Dict: + if kwargs.get("past_key_values") is not None: + raise ValueError("`past_key_values` input is not supported for OVModelWithMambaForCausalLM") + if kwargs.get("position_ids") is not None: + raise ValueError("`position_ids` input is not supported for OVModelWithMambaForCausalLM") + inputs = {"input_ids": input_ids} if "cache_position" in self.input_names: if cache_position is None: @@ -1343,6 +1345,19 @@ def forward( batch_size = input_ids.shape[0] inputs["beam_idx"] = np.arange(batch_size, dtype=int) + return inputs + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.LongTensor] = None, + cache_params=None, + use_cache: Optional[bool] = None, + cache_position: Optional[torch.Tensor] = None, + **kwargs, + ): + inputs = self.prepare_inputs(input_ids, attention_mask, cache_params, use_cache, cache_position, **kwargs) + self.request.start_async(inputs, share_inputs=True) self.request.wait() logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device) From ab805dcd26d5f33aaa707177cb10ecff292bcc84 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 12 Nov 2025 12:19:55 +0100 Subject: [PATCH 3/3] Update optimum/intel/openvino/modeling_decoder.py Co-authored-by: Roman Kazantsev --- optimum/intel/openvino/modeling_decoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 46d782725b..33e9a31690 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -1305,9 +1305,9 @@ def prepare_inputs( **kwargs, ) -> Dict: if kwargs.get("past_key_values") is not None: - raise ValueError("`past_key_values` input is not supported for OVModelWithMambaForCausalLM") + raise ValueError("`past_key_values` input is not supported for `OVModelWithMambaForCausalLM`") if kwargs.get("position_ids") is not None: - raise ValueError("`position_ids` input is not supported for OVModelWithMambaForCausalLM") + raise ValueError("`position_ids` input is not supported for `OVModelWithMambaForCausalLM`") inputs = {"input_ids": input_ids} if "cache_position" in self.input_names: