model: Add granite GPTQ model (#95)

willmj · fabianlim · web-flow · commit e8bc5ddc64f1 · 2024-10-31T16:28:53.000+08:00
* feat: Add granite GPTQ model Signed-off-by: Will Johnson <mwjohnson728@gmail.com> * fmt + lint Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com> * update granite benches to be in line with #92 Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com> --------- Signed-off-by: Will Johnson <mwjohnson728@gmail.com> Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com> Co-authored-by: Yu Chin Fabian Lim <flim@sg.ibm.com>
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/__init__.py
@@ -21,6 +21,7 @@
 from .gemma import GemmaGPTQ
 from .gpt_bigcode import GPTBigCodeGPTQ
 from .gpt_neox import GPTNeoXGPTQ
+from .granite import GraniteGPTQ
 from .llama import LlamaGPTQ
 from .mistral import MistralGPTQ
 from .mixtral import MixtralGPTQ
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/_const.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/_const.py
@@ -25,6 +25,7 @@
     "llama",
     "mistral",
     "mixtral",
+    "granite",
     "gemma",
     "dbrx_converted",
 ]
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/auto.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/auto.py
@@ -28,6 +28,7 @@
 from .gemma import GemmaGPTQ
 from .gpt_bigcode import GPTBigCodeGPTQ
 from .gpt_neox import GPTNeoXGPTQ
+from .granite import GraniteGPTQ
 from .llama import LlamaGPTQ
 from .mistral import MistralGPTQ
 from .mixtral import MixtralGPTQ
@@ -39,6 +40,7 @@
     "mistral": MistralGPTQ,
     "mixtral": MixtralGPTQ,
     "gemma": GemmaGPTQ,
+    "granite": GraniteGPTQ,
     "dbrx": DbrxGPTQ,
     "dbrx_converted": DbrxConvertedGPTQ,
 }
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/granite.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/models/granite.py
@@ -0,0 +1,30 @@
+###############################################################################
+# Adapted from https://github.com/ModelCloud/GPTQModel
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###############################################################################
+# Local
+from .base import BaseGPTQModel
+
+
+class GraniteGPTQ(BaseGPTQModel):
+    base_modules = ["model.embed_tokens", "model.norm"]
+
+    layers_node = "model.layers"
+    layer_type = "GraniteDecoderLayer"
+    layer_modules = [
+        ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
+        ["self_attn.o_proj"],
+        ["mlp.up_proj", "mlp.gate_proj"],
+        ["mlp.down_proj"],
+    ]
diff --git a/scripts/benchmarks/scenarios-granite.yaml b/scripts/benchmarks/scenarios-granite.yaml
@@ -45,14 +45,12 @@ scenarios:
             model_name_or_path: 
                 - 'ibm/PowerLM-3b'
             torch_dtype: bfloat16
-            bf16: True
 
     -   name: standard-peft
         framework_config: 
             - 
             - foak-fast-kernels
         arguments:
-            bf16: True
             learning_rate: 2e-4
             torch_dtype: bfloat16
             peft_method: lora

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@`
`25`	`25`	`"llama",`
`26`	`26`	`"mistral",`
`27`	`27`	`"mixtral",`
	`28`	`+ "granite",`
`28`	`29`	`"gemma",`
`29`	`30`	`"dbrx_converted",`
`30`	`31`	`]`