From 82769c57c2c18b2b5606d4d33a007405ed14c759 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 27 Jan 2026 05:18:29 +0000 Subject: [PATCH 1/4] fix clip --- olive/common/hf/io_config/task_config.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/olive/common/hf/io_config/task_config.py b/olive/common/hf/io_config/task_config.py index 7eaf3d0fc..e0c6aeb3f 100644 --- a/olive/common/hf/io_config/task_config.py +++ b/olive/common/hf/io_config/task_config.py @@ -84,7 +84,7 @@ def get_io_config( _add_present_outputs(dynamic_axes, config) # Order inputs according to model forward signature - ordered_inputs = _order_inputs(dynamic_axes, model) + ordered_inputs = _order_inputs(dynamic_axes, model, set(outputs.keys())) # Separate input and output names input_names = [name for name in ordered_inputs if not name.startswith("present.")] @@ -202,6 +202,7 @@ def _add_present_outputs( def _order_inputs( dynamic_axes: dict, model: PreTrainedModel | None, + output_names: set[str] | None = None, ) -> OrderedDict: """Order inputs according to model forward signature. @@ -210,6 +211,7 @@ def _order_inputs( Args: dynamic_axes: Dict of all dynamic axes (inputs and outputs). model: Optional model for forward signature inspection. + output_names: Set of output names to exclude from input ordering. Returns: OrderedDict of input names to dynamic axes, ordered by forward signature. @@ -217,11 +219,15 @@ def _order_inputs( """ import re - # Filter to only input names (not outputs like present.*) + if output_names is None: + output_names = set() + + # Filter to only input names (not outputs like present.* or explicit output names) input_axes = OrderedDict() for name, axes in dynamic_axes.items(): - if not name.startswith("present.") and not name.startswith("logits"): - input_axes[name] = axes + if name.startswith("present.") or name in output_names: + continue + input_axes[name] = axes if model is None: return input_axes From 784d25261fc2cc7bf4de8957c134958c69a17c55 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 27 Jan 2026 06:06:44 +0000 Subject: [PATCH 2/4] Fix dummy_inputs and dynamic_shapes inconsistency for optional inputs --- olive/common/hf/model_io.py | 8 ++++++++ olive/model/handler/mixin/hf.py | 1 + olive/passes/onnx/conversion.py | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py index 61233a8d0..a3ebd7305 100644 --- a/olive/common/hf/model_io.py +++ b/olive/common/hf/model_io.py @@ -91,6 +91,7 @@ def get_model_io_config( def get_model_dummy_input( model_name: str, task: str, + model: Optional["PreTrainedModel"] = None, **kwargs, ) -> Optional[dict[str, Any]]: """Get dummy inputs for the model and task. @@ -98,6 +99,7 @@ def get_model_dummy_input( Args: model_name: The model name or path. task: The task type. + model: Optional loaded model for input signature inspection. **kwargs: Additional arguments including use_cache, batch_size, sequence_length. Returns: @@ -133,10 +135,16 @@ def get_model_dummy_input( # Get model config (handles MLflow paths) model_config = get_model_config(model_name, **kwargs) + # Handle PEFT models + actual_model = model + if model is not None and is_peft_model(model): + actual_model = model.get_base_model() + try: return generate_dummy_inputs( model_name_or_config=model_config, task=actual_task, + model=actual_model, use_past=use_past, use_past_in_inputs=use_past_in_inputs, ) diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py index 9fbcbad99..730d6bd6e 100644 --- a/olive/model/handler/mixin/hf.py +++ b/olive/model/handler/mixin/hf.py @@ -121,6 +121,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]: return get_model_dummy_input( self.model_path, self.task, + model=self.load_model(), **self.get_load_kwargs(), ) diff --git a/olive/passes/onnx/conversion.py b/olive/passes/onnx/conversion.py index 95ec55e54..b1b37333c 100644 --- a/olive/passes/onnx/conversion.py +++ b/olive/passes/onnx/conversion.py @@ -853,7 +853,7 @@ def _validate_dynamic_shapes(dynamic_shapes, dummy_inputs, dummy_kwargs, model): # Align tree spec only for not transformers.Cache. if len(dummy_inputs) == 0: for k, v in dummy_kwargs.items(): - if not isinstance(v, transformers.Cache): + if not isinstance(v, transformers.Cache) and k in dynamic_shapes: input_tree_spec = _pytree.tree_flatten(v)[1] flatten_dynamic_shapes = get_the_flattened_and_tree_spec(dynamic_shapes[k], leaf_is_str=False)[0] dynamic_shapes[k] = _pytree.tree_unflatten(flatten_dynamic_shapes, input_tree_spec) From e507e6d42458590b3da2894df819351b50f250c2 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 27 Jan 2026 07:05:13 +0000 Subject: [PATCH 3/4] fix doc link --- docs/source/features/model-splitting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/features/model-splitting.md b/docs/source/features/model-splitting.md index 35b1559c9..159881f54 100644 --- a/docs/source/features/model-splitting.md +++ b/docs/source/features/model-splitting.md @@ -37,7 +37,7 @@ Number splits **`cost-model`** -Let's now split the model using a cost model. Please refer to the [pre-generated cost models](https://github.com/microsoft/Olive/blob/main/assets/cost_models/Phi-3.5-mini.csv) in the Olive repository for an example a cost model csv. +Let's now split the model using a cost model. Please refer to the [pre-generated cost models](https://github.com/microsoft/Olive/blob/main/olive/assets/cost_models/Phi-3.5-mini.csv) in the Olive repository for an example a cost model csv. ```bash olive auto-opt -m microsoft/Phi-3.5-mini-instruct --precision fp16 --provider CUDAExecutionProvider --memory 2GB --cost-model phi-3.5-cost.csv -o models/phi-costsplit From 81ed01e3d7560e18c211f87b30f77d72307841e9 Mon Sep 17 00:00:00 2001 From: Xiaoyu Date: Tue, 27 Jan 2026 07:50:22 +0000 Subject: [PATCH 4/4] fix tests --- test/model/test_hf_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/model/test_hf_model.py b/test/model/test_hf_model.py index 6e400ddb1..d49d60715 100644 --- a/test/model/test_hf_model.py +++ b/test/model/test_hf_model.py @@ -4,7 +4,7 @@ # -------------------------------------------------------------------------- import json from pathlib import Path -from unittest.mock import patch +from unittest.mock import ANY, patch import huggingface_hub import pytest @@ -222,5 +222,5 @@ def test_hf_onnx_config_dummy_inputs(self, get_model_dummy_input): # get dummy inputs dummy_inputs = olive_model.get_dummy_inputs() - get_model_dummy_input.assert_called_once_with(self.model_name, self.task) + get_model_dummy_input.assert_called_once_with(self.model_name, self.task, model=ANY) assert dummy_inputs == 1