fix all models

fabianlim · fabianlim · commit 9cf8f6572575 · 2024-11-08T11:06:42.000Z
Signed-off-by: Yu Chin Fabian Lim &lt;flim@sg.ibm.com&gt;
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/framework_plugin_fast_kernels.py
@@ -29,7 +29,7 @@
 
 # consider rewriting register_foak_model_patch_rules into something
 # like this also
-def register_foak_model_patch_rules2(
+def register_foak_model_patch_rules(
     base_type: str,
     filter_endswith: Set[str] = None,
     config: PretrainedConfig = None,
@@ -52,8 +52,8 @@ def register_foak_model_patch_rules2(
     # create model specific rules
     rules = [
         *gpt_bigcode.get_mp_rules(base_type),
-        *granite.get_mp_rules(base_type),
-        *llama.get_mp_rules(base_type),
+        *granite.get_mp_rules(base_type, config),
+        *llama.get_mp_rules(base_type, config),
         *mistral.get_mp_rules(base_type, config),
         *mixtral.get_mp_rules(base_type),
     ]
@@ -166,7 +166,7 @@ def augmentation(
 
         # wrapper function to register foak patches
         # - the base layer setting below will be ignored in non quantized-lora settings
-        register_foak_model_patch_rules2(
+        register_foak_model_patch_rules(
             base_type=self.configurations["base_layer"],
             filter_endswith=terms,
             config=model.config,
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granite.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/granite.py
@@ -14,6 +14,7 @@
 
 # Standard
 from functools import partial
+import warnings
 
 # Third Party
 from fms_acceleration.model_patcher import (
@@ -22,15 +23,23 @@
     combine_functions,
     combine_triggers,
 )
+from transformers import PretrainedConfig
 
 # Local
 from ..kernels.unsloth.cross_entropy_loss import FastCrossEntropyLoss
 from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
 from ..kernels.unsloth.rope_embedding import fast_rope_embedding
-from .utils import KEY_MLP, KEY_O, KEY_QKV, build_lora_fused_ops, trigger_fused_ops
+from .utils import (
+    KEY_MLP,
+    KEY_O,
+    KEY_QKV,
+    build_lora_fused_ops,
+    filter_mp_rules,
+    trigger_fused_ops,
+)
 
 
-def get_mp_rules(base_type: str):
+def get_mp_rules(base_type: str, config: PretrainedConfig = None):
     """
     Function to access all patch rules in this module.
     If it is a forward_builder rule with `base_type` in
@@ -47,7 +56,7 @@ def get_mp_rules(base_type: str):
     except ImportError:
         return []
 
-    return [
+    rules = [
         # TODO: have a generic version of this rule
         # - do regex on RMSNorm class name
         # - check on the tensors required for fast_rms_layernorm
@@ -133,3 +142,14 @@ def get_mp_rules(base_type: str):
             ),
         ),
     ]
+
+    # perform model specific filtering
+    if config and config.hidden_act != "silu":
+        warnings.warn(
+            f"Granite activation is {config.hdiden_act}, "
+            "thus disabling LoRA fused-op for MLP, since only SwiGLU "
+            "is supported. This only affects quantized-peft."
+        )
+        rules = filter_mp_rules(rules, {"mlp"}, drop=True)
+
+    return rules
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/llama.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/llama.py
@@ -14,6 +14,7 @@
 
 # Standard
 from functools import partial
+import warnings
 
 # Third Party
 from fms_acceleration.model_patcher import (
@@ -22,6 +23,7 @@
     combine_functions,
     combine_triggers,
 )
+from transformers import PretrainedConfig
 from transformers.models.llama.modeling_llama import (
     LlamaAttention,
     LlamaMLP,
@@ -32,17 +34,24 @@
 from ..kernels.unsloth.cross_entropy_loss import FastCrossEntropyLoss
 from ..kernels.unsloth.rms_layernorm import fast_rms_layernorm
 from ..kernels.unsloth.rope_embedding import fast_rope_embedding
-from .utils import KEY_MLP, KEY_O, KEY_QKV, build_lora_fused_ops, trigger_fused_ops
+from .utils import (
+    KEY_MLP,
+    KEY_O,
+    KEY_QKV,
+    build_lora_fused_ops,
+    filter_mp_rules,
+    trigger_fused_ops,
+)
 
 
-def get_mp_rules(base_type: str):
+def get_mp_rules(base_type: str, config: PretrainedConfig = None):
     """
     Function to access all patch rules in this module.
     If it is a forward_builder rule with `base_type` in
     its forward builder argument, wrap the forward_builder
     function as a partial function with the base_type argument
     """
-    return [
+    rules = [
         # TODO: have a generic version of this rule
         # - do regex on RMSNorm class name
         # - check on the tensors required for fast_rms_layernorm
@@ -128,3 +137,14 @@ def get_mp_rules(base_type: str):
             ),
         ),
     ]
+
+    # perform model specific filtering
+    if config and config.hidden_act != "silu":
+        warnings.warn(
+            f"LLama activation is {config.hdiden_act}, "
+            "thus disabling LoRA fused-op for MLP, since only SwiGLU "
+            "is supported. This only affects quantized-peft."
+        )
+        rules = filter_mp_rules(rules, {"mlp"}, drop=True)
+
+    return rules
diff --git a/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mistral.py b/plugins/fused-ops-and-kernels/src/fms_acceleration_foak/models/mistral.py
@@ -44,7 +44,7 @@
 )
 
 
-def get_mp_rules(base_type: str, config: PretrainedConfig):
+def get_mp_rules(base_type: str, config: PretrainedConfig = None):
     """
     Function to access all patch rules in this module.
     If it is a forward_builder rule with `base_type` in
@@ -130,9 +130,9 @@ def get_mp_rules(base_type: str, config: PretrainedConfig):
     ]
 
     # perform model specific filtering
-    if config.hidden_act != "silu":
+    if config and config.hidden_act != "silu":
         warnings.warn(
-            f"Mixtral activation is {config.hdiden_act}, "
+            f"Mistral activation is {config.hdiden_act}, "
             "thus disabling LoRA fused-op for MLP, since only SwiGLU "
             "is supported. This only affects quantized-peft."
         )

Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@`
`44`	`44`	`)`
`45`	`45`
`46`	`46`
`47`		`-def get_mp_rules(base_type: str, config: PretrainedConfig):`
	`47`	`+def get_mp_rules(base_type: str, config: PretrainedConfig = None):`
`48`	`48`	`"""`
`49`	`49`	`Function to access all patch rules in this module.`
`50`	`50`	If it is a forward_builder rule with `base_type` in
`@@ -130,9 +130,9 @@ def get_mp_rules(base_type: str, config: PretrainedConfig):`
`130`	`130`	`]`
`131`	`131`
`132`	`132`	`# perform model specific filtering`
`133`		`- if config.hidden_act != "silu":`
	`133`	`+ if config and config.hidden_act != "silu":`
`134`	`134`	`warnings.warn(`
`135`		`- f"Mixtral activation is {config.hdiden_act}, "`
	`135`	`+ f"Mistral activation is {config.hdiden_act}, "`
`136`	`136`	`"thus disabling LoRA fused-op for MLP, since only SwiGLU "`
`137`	`137`	`"is supported. This only affects quantized-peft."`
`138`	`138`	`)`