Gemma3-4b QNN example fixes #2106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

qti-kromero wants to merge 26 commits into microsoft:main from CodeLinaro:dev/qti-kromero/gemma3

olive/common/hf/utils.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     # Copyright (c) Microsoft Corporation. All rights reserved.
     # Licensed under the MIT License.
     # --------------------------------------------------------------------------
+    import importlib
     import logging
     from pathlib import Path
     from typing import TYPE_CHECKING, Optional, Union
@@ Expand All / @@ -18,7 +19,13 @@ @@
     logger = logging.getLogger(__name__)
-    def load_model_from_task(task: str, model_name_or_path: str, **kwargs) -> "PreTrainedModel":
+    def load_model_from_task(
+        task: str,
+        model_name_or_path: str,
+        custom_task_class_name: str = None,
+        custom_task_class_module: str = None,
+        **kwargs,
+    ) -> "PreTrainedModel":
         """Load huggingface model from task and model_name_or_path."""
         from transformers.pipelines import check_task
@@ Expand Down Expand Up @@
                 AUTO_QUANTIZATION_CONFIG_MAPPING["olive"] = OliveHfQuantizationConfig
                 AUTO_QUANTIZER_MAPPING["olive"] = OliveHfQuantizer
-        class_tuple = targeted_task["pt"] or (AutoModel,)
+        if custom_task_class_module is not None and custom_task_class_name is not None:
+            module = importlib.import_module(custom_task_class_module)
+            class_tuple = (getattr(module, custom_task_class_name),)
+        else:
+            class_tuple = targeted_task["pt"] or (AutoModel,)
         model = None
         for i, model_class in enumerate(class_tuple):
             try:
@@ Expand Down Expand Up @@
         config.save_pretrained(output_dir, **kwargs)
+    def get_model_attributes_config(config: "PretrainedConfig", model_type: str):
+        return config.text_config if model_type == "gemma3" else config
     def save_module_files(
         config: "PretrainedConfig", model_name_or_path: str, output_dir: str, **kwargs
     ) -> tuple["PretrainedConfig", list[str]]:
@@ Expand Down @@

olive/common/hf/wrapper.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     from torch import nn
     from transformers import PretrainedConfig
+    from olive.common.hf.utils import get_model_attributes_config
     from olive.common.utils import find_first_matched_value, get_attr, replace_submodules, set_attr
     if TYPE_CHECKING:
@@ Expand Down Expand Up / @@ -196,6 +197,7 @@ class ModelWrapper: @@
             "default": ["model.embed_tokens"],
             "bloom": ["transformer.word_embeddings", "transformer.word_embeddings_layernorm"],
             "falcon": ["transformer.word_embeddings"],
+            "gemma3": ["model.language_model.embed_tokens"],
             "gpt2": ["transformer.wte", "transformer.wpe"],
             "gpt_neox": ["gpt_neox.embed_in"],
             "gptj": ["transformer.wte"],
@@ Expand All / @@ -210,11 +212,17 @@ class ModelWrapper: @@
             "qwen": "transformer.rotary_emb",
         }
         LM_HEAD = {"default": "lm_head"}
-        PRE_HEAD_LAYERNORM = {"default": "model.norm", "gpt2": "transformer.ln_f", "qwen": "transformer.ln_f"}
+        PRE_HEAD_LAYERNORM = {
+            "default": "model.norm",
+            "gemma3": "model.language_model.norm",
+            "gpt2": "transformer.ln_f",
+            "qwen": "transformer.ln_f",
+        }
         LAYERS = {
             "default": "model.layers",
             "bloom": "transformer.h",
             "falcon": "transformer.h",
+            "gemma3": "model.language_model.layers",
             "gpt2": "transformer.h",
             "gpt_neox": "gpt_neox.layers",
             "gptj": "transformer.h",
@@ Expand All @@
             self.config = config if isinstance(config, PretrainedConfig) else PretrainedConfig.from_dict(config)
             self.model_type = find_first_matched_value(self.config, "model_type")
+            logger.error(self.config)
             # model attributes
-            self.hidden_size = find_first_matched_value(self.config, self.HIDDEN_SIZE_NAMES)
-            self.num_attention_heads = find_first_matched_value(self.config, self.NUM_ATTENTION_HEADS_NAMES)
+            model_attributes_config = get_model_attributes_config(self.config, self.model_type)
+            self.hidden_size = find_first_matched_value(model_attributes_config, self.HIDDEN_SIZE_NAMES)
+            self.num_attention_heads = find_first_matched_value(model_attributes_config, self.NUM_ATTENTION_HEADS_NAMES)
             self.num_key_value_heads = (
-                find_first_matched_value(self.config, self.NUM_KEY_VALUE_HEADS_NAMES) or self.num_attention_heads
+                find_first_matched_value(model_attributes_config, self.NUM_KEY_VALUE_HEADS_NAMES)
+                or self.num_attention_heads
             )
             self.head_dim = (
-                find_first_matched_value(self.config, self.HEAD_DIM_NAMES) or self.hidden_size // self.num_attention_heads
+                find_first_matched_value(model_attributes_config, self.HEAD_DIM_NAMES)
+                or self.hidden_size // self.num_attention_heads
             )
-            self.num_hidden_layers = find_first_matched_value(self.config, self.NUM_HIDDEN_LAYER_NAMES)
-            self.max_length = find_first_matched_value(self.config, self.MAX_LENGTH)
+            self.num_hidden_layers = find_first_matched_value(model_attributes_config, self.NUM_HIDDEN_LAYER_NAMES)
+            self.max_length = find_first_matched_value(model_attributes_config, self.MAX_LENGTH)
             self._model = None
             self._layer_wrappers = None
@@ Expand Down Expand Up / @@ -267,6 +280,7 @@ def get_pre_head_layernorm(self, return_name: bool = True): @@
             return get_submodules(self.model, self.PRE_HEAD_LAYERNORM, self.model_type, return_name=return_name)
         def get_layers(self, return_name: bool = True):
+            logger.error(self.model)
             return get_submodules(self.model, self.LAYERS, self.model_type, return_name=return_name)
         def get_layer_wrappers(self):
@@ Expand Down @@

olive/model/handler/hf.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -28,7 +28,7 @@ @@
     @model_handler_registry("HFModel")
     class HfModelHandler(PyTorchModelHandlerBase, MLFlowTransformersMixin, HfMixin):  # pylint: disable=too-many-ancestors
         resource_keys: tuple[str, ...] = ("model_path", "adapter_path")
-        json_config_keys: tuple[str, ...] = ("task", "load_kwargs")
+        json_config_keys: tuple[str, ...] = ("task", "load_kwargs", "custom_task_class_name", "custom_task_class_module")
         def __init__(
             self,
@@ Expand All / @@ -38,6 +38,8 @@ def __init__( @@
             io_config: Union[dict[str, Any], IoConfig, str] = None,
             adapter_path: OLIVE_RESOURCE_ANNOTATIONS = None,
             model_attributes: Optional[dict[str, Any]] = None,
+            custom_task_class_name: str = None,
+            custom_task_class_module: str = None,
         ):
             super().__init__(
                 model_file_format=None,
@@ Expand All / @@ -47,6 +49,8 @@ def __init__( @@
             )
             self.add_resources(locals())
             self.task = task
+            self.custom_task_class_name = custom_task_class_name
+            self.custom_task_class_module = custom_task_class_module
             self.load_kwargs = validate_config(load_kwargs, HfLoadKwargs, warn_unused_keys=False) if load_kwargs else None
             self.model_attributes = {**self.get_hf_model_config().to_dict(), **(self.model_attributes or {})}
@@ Expand All @@
             if self.model:
                 model = self.model
             else:
-                model = load_model_from_task(self.task, self.model_path, **self.get_load_kwargs())
+                model = load_model_from_task(
+                    self.task,
+                    self.model_path,
+                    self.custom_task_class_name,
+                    self.custom_task_class_module,
+                    **self.get_load_kwargs(),
+                )
                 # we only have peft adapters for now
                 if self.adapter_path:
@@ Expand All @@
                     model = PeftModel.from_pretrained(model, self.adapter_path)
             self.model = model if cache_model else None
             return model
         @property
@@ Expand Down @@

olive/passes/pytorch/gptqmodel.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -189,8 +189,18 @@ def get_dataset( @@
                 raise ValueError("Data config is required for PyTorch model.")
             data_config = validate_config(data_config, DataConfig)
             dataloader = data_config.to_data_container().create_dataloader()
-            # each batch consists of (input_data, labels)
-            dataset = [data[0] for data in dataloader]
+            # each batch consists of (input_data, labels) or just input_data
+            dataset = []
+            for data in dataloader:
+                if isinstance(data, (tuple, list)) and len(data) > 0:
+                    # Standard format: (input_data, labels)
+                    dataset.append(data[0])
+                elif isinstance(data, dict):
+                    # Data is already in the expected dictionary format
+                    dataset.append(data)
+                else:
+                    # Data is the input data directly
+                    dataset.append(data)
             if (
                 not dataset
                 or not isinstance(dataset, list)
@@ Expand Down @@

olive/passes/pytorch/rotate.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -44,6 +44,11 @@ class RotateMode(StrEnumBase):
  
        @classmethod

        def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:

            return {

                "device": PassConfigParam(

                    type_=str,

                    default_value="cpu",

                    description="Whether to run rotation on cpu or gpu. Accepted values are 'cpu' and 'cuda'.",

                ),

                "seed": PassConfigParam(

                    type_=int,

                    default_value=0,

    @@ -60,6 +65,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
  
        def rotate_model(

            self,

            model: HfModelHandler,

            device: str,

            rotate_mode: str,

            seed: int,

            training_args: Optional[BaseHFTrainingArguments] = None,

    @@ -157,10 +163,13 @@ def rotate_model(
  
                    count_trainable_parameters(model_wrapper.model),

                )

            if device == "cuda" and not torch.cuda.is_available():

                raise ValueError("Please install CUDA to rotate with it.")

            return (

                model_wrapper,

                rotation_params,

                [((RotateEmbed, RotateLinear), lambda x: x.create_merged("cuda" if torch.cuda.is_available() else "cpu"))],

                [((RotateEmbed, RotateLinear), lambda x: x.create_merged(device))],

            )

        @classmethod

    @@ -246,7 +255,7 @@ class QuaRot(RotateBase):
  
        def _run_for_config(

            self, model: HfModelHandler, config: type[BasePassConfig], output_model_path: str

        ) -> HfModelHandler:

            model_wrapper, _, save_replacements = self.rotate_model(model, config.rotate_mode, config.seed)

            model_wrapper, _, save_replacements = self.rotate_model(model, config.device, config.rotate_mode, config.seed)

            # save the model

            model_wrapper.save_model(output_model_path, replacements=save_replacements)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Gemma3-4b QNN example fixes #2106

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!

Gemma3-4b QNN example fixes #2106

Are you sure you want to change the base?

Uh oh!

Gemma3-4b QNN example fixes #2106

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Uh oh!