Skip to content

Commit ae29bfd

Browse files
committed
Add granite support for DQ with smoothquant
Signed-off-by: Andrea Fasoli <[email protected]>
1 parent 3f07692 commit ae29bfd

File tree

1 file changed

+50
-32
lines changed

1 file changed

+50
-32
lines changed

fms_mo/utils/dq_utils.py

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,54 +11,72 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
"""
15-
Utils for DQ
14+
"""Utility functions for Direct Quantization" (DQ)."""
1615

17-
"""
1816

17+
def config_quantize_smooth_layers(qcfg: dict):
18+
"""Update qcfg with model-dependent config parameters:
19+
- qlayer_name_pattern: identifier of transformer layers containing linear layers
20+
to quantize (if any, tracing is bypassed)
21+
- scale_layers: identifier of linear layers to apply smoothquant on
22+
- qskip_layer_name: full name of linear layers that will not be quantized
23+
- act_scale_path: path to save/load smoothquant activation scales
1924
20-
def config_quantize_smooth_layers(qcfg):
21-
"""
22-
To set the config for each model, for example
23-
layers to quantize
24-
layers to skip
25-
layers to apply smooth-scale
26-
block_size
27-
smooth_alpha
25+
Selected model is determined by comparing all architecture identifiers against
26+
`model` and `model_type` fields in qcfg.
27+
28+
NOTE: layer quantization skip is determined by bool `qskip_large_mag_layers`
29+
NOTE: different versions of granite models are based on different architectures
30+
(chronologically: bigcode -> llama -> granite)
2831
"""
32+
2933
llama_architecture = [
3034
"llama",
3135
"Nemotron",
3236
"granite-3b-code",
3337
"granite-8b-code",
3438
]
35-
granite_BigCode_architecture = [
39+
bigcode_architecture = [
3640
"granite-3b-base",
3741
"granite-13b-base",
3842
"granite-20b-code",
3943
"granite-20b-code",
4044
]
41-
if (
42-
any(model in qcfg["model"] for model in llama_architecture)
43-
or any(model in qcfg["model_type"] for model in llama_architecture)
44-
and qcfg["qskip_large_mag_layers"]
45+
granite_architecture = [
46+
"granite-3.0-8b-base",
47+
"granite-3.0-8b-instruct",
48+
"granite-3.1-8b-base",
49+
"granite-3.1-8b-instruct",
50+
"granite-3.2-8b-instruct",
51+
"granite-3.3-8b-base",
52+
"granite-3.3-8b-instruct",
53+
]
54+
55+
if any(model in qcfg["model"] for model in llama_architecture) or any(
56+
model in qcfg["model_type"] for model in llama_architecture
4557
):
4658
qcfg["qlayer_name_pattern"] = ["model.layers."]
4759
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
48-
large_mag_layers = {
49-
"2-7b": [1, 30],
50-
"2-70b": [2, 8, 79],
51-
"3-8B": [1, 31],
52-
"3-70B": [3, 78, 79],
53-
"405B-Instruct": [5, 124, 125],
54-
}
55-
for llama_family, layers in large_mag_layers.items():
56-
if llama_family in qcfg["model"]:
57-
qcfg["qskip_layer_name"] += [
58-
f"model.layers.{i}.mlp.down_proj" for i in layers
59-
]
60-
break
61-
60+
if qcfg["qskip_large_mag_layers"]:
61+
large_mag_layers = {
62+
"2-7b": [1, 30],
63+
"2-70b": [2, 8, 79],
64+
"3-8B": [1, 31],
65+
"3-70B": [3, 78, 79],
66+
"405B-Instruct": [5, 124, 125],
67+
}
68+
for llama_family, layers in large_mag_layers.items():
69+
if llama_family in qcfg["model"]:
70+
qcfg["qskip_layer_name"] += [
71+
f"model.layers.{i}.mlp.down_proj" for i in layers
72+
]
73+
break
74+
elif any(model in qcfg["model"] for model in granite_architecture) or any(
75+
model in qcfg["model_type"] for model in granite_architecture
76+
):
77+
qcfg["qlayer_name_pattern"] = ["model.layers."]
78+
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
79+
# NOTE: supported granite-v3 models do not need layer skip for large magnitude
6280
elif "mixtral" in qcfg["model"]:
6381
qcfg["qlayer_name_pattern"] = (
6482
["model.layers"] if qcfg["nbits_bmm1"] == 32 else []
@@ -81,10 +99,10 @@ def config_quantize_smooth_layers(qcfg):
8199
]
82100
]
83101
qcfg["act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
84-
elif any(model in qcfg["model"] for model in granite_BigCode_architecture):
102+
elif any(model in qcfg["model"] for model in bigcode_architecture):
85103
qcfg["qlayer_name_pattern"] = ["transformer.h"]
86104
qcfg["scale_layers"] = ["c_attn", "c_fc"]
87-
qcfg["qskip_layer_name"] = []
105+
# NOTE: supported bigcode models do not need layer skip for large magnitude
88106
if "granite-3b-base-v2" in qcfg["model"]:
89107
qcfg["act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
90108
if "granite-13b-base-v2" in qcfg["model"]:

0 commit comments

Comments
 (0)