From 82769c57c2c18b2b5606d4d33a007405ed14c759 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Tue, 27 Jan 2026 05:18:29 +0000
Subject: [PATCH 1/4] fix clip

---
 olive/common/hf/io_config/task_config.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/olive/common/hf/io_config/task_config.py b/olive/common/hf/io_config/task_config.py
index 7eaf3d0fc..e0c6aeb3f 100644
--- a/olive/common/hf/io_config/task_config.py
+++ b/olive/common/hf/io_config/task_config.py
@@ -84,7 +84,7 @@ def get_io_config(
         _add_present_outputs(dynamic_axes, config)
 
     # Order inputs according to model forward signature
-    ordered_inputs = _order_inputs(dynamic_axes, model)
+    ordered_inputs = _order_inputs(dynamic_axes, model, set(outputs.keys()))
 
     # Separate input and output names
     input_names = [name for name in ordered_inputs if not name.startswith("present.")]
@@ -202,6 +202,7 @@ def _add_present_outputs(
 def _order_inputs(
     dynamic_axes: dict,
     model: PreTrainedModel | None,
+    output_names: set[str] | None = None,
 ) -> OrderedDict:
     """Order inputs according to model forward signature.
 
@@ -210,6 +211,7 @@ def _order_inputs(
     Args:
         dynamic_axes: Dict of all dynamic axes (inputs and outputs).
         model: Optional model for forward signature inspection.
+        output_names: Set of output names to exclude from input ordering.
 
     Returns:
         OrderedDict of input names to dynamic axes, ordered by forward signature.
@@ -217,11 +219,15 @@ def _order_inputs(
     """
     import re
 
-    # Filter to only input names (not outputs like present.*)
+    if output_names is None:
+        output_names = set()
+
+    # Filter to only input names (not outputs like present.* or explicit output names)
     input_axes = OrderedDict()
     for name, axes in dynamic_axes.items():
-        if not name.startswith("present.") and not name.startswith("logits"):
-            input_axes[name] = axes
+        if name.startswith("present.") or name in output_names:
+            continue
+        input_axes[name] = axes
 
     if model is None:
         return input_axes

From 784d25261fc2cc7bf4de8957c134958c69a17c55 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Tue, 27 Jan 2026 06:06:44 +0000
Subject: [PATCH 2/4] Fix dummy_inputs and dynamic_shapes inconsistency for
 optional inputs

---
 olive/common/hf/model_io.py     | 8 ++++++++
 olive/model/handler/mixin/hf.py | 1 +
 olive/passes/onnx/conversion.py | 2 +-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/olive/common/hf/model_io.py b/olive/common/hf/model_io.py
index 61233a8d0..a3ebd7305 100644
--- a/olive/common/hf/model_io.py
+++ b/olive/common/hf/model_io.py
@@ -91,6 +91,7 @@ def get_model_io_config(
 def get_model_dummy_input(
     model_name: str,
     task: str,
+    model: Optional["PreTrainedModel"] = None,
     **kwargs,
 ) -> Optional[dict[str, Any]]:
     """Get dummy inputs for the model and task.
@@ -98,6 +99,7 @@ def get_model_dummy_input(
     Args:
         model_name: The model name or path.
         task: The task type.
+        model: Optional loaded model for input signature inspection.
         **kwargs: Additional arguments including use_cache, batch_size, sequence_length.
 
     Returns:
@@ -133,10 +135,16 @@ def get_model_dummy_input(
     # Get model config (handles MLflow paths)
     model_config = get_model_config(model_name, **kwargs)
 
+    # Handle PEFT models
+    actual_model = model
+    if model is not None and is_peft_model(model):
+        actual_model = model.get_base_model()
+
     try:
         return generate_dummy_inputs(
             model_name_or_config=model_config,
             task=actual_task,
+            model=actual_model,
             use_past=use_past,
             use_past_in_inputs=use_past_in_inputs,
         )
diff --git a/olive/model/handler/mixin/hf.py b/olive/model/handler/mixin/hf.py
index 9fbcbad99..730d6bd6e 100644
--- a/olive/model/handler/mixin/hf.py
+++ b/olive/model/handler/mixin/hf.py
@@ -121,6 +121,7 @@ def get_hf_dummy_inputs(self) -> Optional[dict[str, Any]]:
         return get_model_dummy_input(
             self.model_path,
             self.task,
+            model=self.load_model(),
             **self.get_load_kwargs(),
         )
 
diff --git a/olive/passes/onnx/conversion.py b/olive/passes/onnx/conversion.py
index 95ec55e54..b1b37333c 100644
--- a/olive/passes/onnx/conversion.py
+++ b/olive/passes/onnx/conversion.py
@@ -853,7 +853,7 @@ def _validate_dynamic_shapes(dynamic_shapes, dummy_inputs, dummy_kwargs, model):
     # Align tree spec only for not transformers.Cache.
     if len(dummy_inputs) == 0:
         for k, v in dummy_kwargs.items():
-            if not isinstance(v, transformers.Cache):
+            if not isinstance(v, transformers.Cache) and k in dynamic_shapes:
                 input_tree_spec = _pytree.tree_flatten(v)[1]
                 flatten_dynamic_shapes = get_the_flattened_and_tree_spec(dynamic_shapes[k], leaf_is_str=False)[0]
                 dynamic_shapes[k] = _pytree.tree_unflatten(flatten_dynamic_shapes, input_tree_spec)

From e507e6d42458590b3da2894df819351b50f250c2 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Tue, 27 Jan 2026 07:05:13 +0000
Subject: [PATCH 3/4] fix doc link

---
 docs/source/features/model-splitting.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/features/model-splitting.md b/docs/source/features/model-splitting.md
index 35b1559c9..159881f54 100644
--- a/docs/source/features/model-splitting.md
+++ b/docs/source/features/model-splitting.md
@@ -37,7 +37,7 @@ Number splits
 
 **`cost-model`**
 
-Let's now split the model using a cost model. Please refer to the [pre-generated cost models](https://github.com/microsoft/Olive/blob/main/assets/cost_models/Phi-3.5-mini.csv) in the Olive repository for an example a cost model csv.
+Let's now split the model using a cost model. Please refer to the [pre-generated cost models](https://github.com/microsoft/Olive/blob/main/olive/assets/cost_models/Phi-3.5-mini.csv) in the Olive repository for an example a cost model csv.
 
 ```bash
 olive auto-opt -m microsoft/Phi-3.5-mini-instruct --precision fp16 --provider CUDAExecutionProvider --memory 2GB --cost-model phi-3.5-cost.csv -o models/phi-costsplit

From 81ed01e3d7560e18c211f87b30f77d72307841e9 Mon Sep 17 00:00:00 2001
From: Xiaoyu <xiaoyuzhang@microsoft.com>
Date: Tue, 27 Jan 2026 07:50:22 +0000
Subject: [PATCH 4/4] fix tests

---
 test/model/test_hf_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/model/test_hf_model.py b/test/model/test_hf_model.py
index 6e400ddb1..d49d60715 100644
--- a/test/model/test_hf_model.py
+++ b/test/model/test_hf_model.py
@@ -4,7 +4,7 @@
 # --------------------------------------------------------------------------
 import json
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import ANY, patch
 
 import huggingface_hub
 import pytest
@@ -222,5 +222,5 @@ def test_hf_onnx_config_dummy_inputs(self, get_model_dummy_input):
         # get dummy inputs
         dummy_inputs = olive_model.get_dummy_inputs()
 
-        get_model_dummy_input.assert_called_once_with(self.model_name, self.task)
+        get_model_dummy_input.assert_called_once_with(self.model_name, self.task, model=ANY)
         assert dummy_inputs == 1