1111# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212# See the License for the specific language governing permissions and
1313# limitations under the License.
14- """
15- Utils for DQ
14+ """Utility functions for Direct Quantization" (DQ)."""
1615
17- """
1816
17+ def config_quantize_smooth_layers (qcfg : dict ):
18+ """Update qcfg with model-dependent config parameters:
19+ - qlayer_name_pattern: identifier of transformer layers containing linear layers
20+ to quantize (if any, tracing is bypassed)
21+ - scale_layers: identifier of linear layers to apply smoothquant on
22+ - qskip_layer_name: full name of linear layers that will not be quantized
23+ - act_scale_path: path to save/load smoothquant activation scales
1924
20- def config_quantize_smooth_layers (qcfg ):
21- """
22- To set the config for each model, for example
23- layers to quantize
24- layers to skip
25- layers to apply smooth-scale
26- block_size
27- smooth_alpha
25+ Selected model is determined by comparing all architecture identifiers against
26+ `model` and `model_type` fields in qcfg.
27+
28+ NOTE: layer quantization skip is determined by bool `qskip_large_mag_layers`
29+ NOTE: different versions of granite models are based on different architectures
30+ (chronologically: bigcode -> llama -> granite)
2831 """
32+
2933 llama_architecture = [
3034 "llama" ,
3135 "Nemotron" ,
3236 "granite-3b-code" ,
3337 "granite-8b-code" ,
3438 ]
35- granite_BigCode_architecture = [
39+ bigcode_architecture = [
3640 "granite-3b-base" ,
3741 "granite-13b-base" ,
3842 "granite-20b-code" ,
3943 "granite-20b-code" ,
4044 ]
41- if (
42- any (model in qcfg ["model" ] for model in llama_architecture )
43- or any (model in qcfg ["model_type" ] for model in llama_architecture )
44- and qcfg ["qskip_large_mag_layers" ]
45+ granite_architecture = [
46+ "granite-3.0-8b-base" ,
47+ "granite-3.0-8b-instruct" ,
48+ "granite-3.1-8b-base" ,
49+ "granite-3.1-8b-instruct" ,
50+ "granite-3.2-8b-instruct" ,
51+ "granite-3.3-8b-base" ,
52+ "granite-3.3-8b-instruct" ,
53+ ]
54+
55+ if any (model in qcfg ["model" ] for model in llama_architecture ) or any (
56+ model in qcfg ["model_type" ] for model in llama_architecture
4557 ):
4658 qcfg ["qlayer_name_pattern" ] = ["model.layers." ]
4759 qcfg ["scale_layers" ] = ["k_proj" , "v_proj" , "gate_proj" , "up_proj" ]
48- large_mag_layers = {
49- "2-7b" : [1 , 30 ],
50- "2-70b" : [2 , 8 , 79 ],
51- "3-8B" : [1 , 31 ],
52- "3-70B" : [3 , 78 , 79 ],
53- "405B-Instruct" : [5 , 124 , 125 ],
54- }
55- for llama_family , layers in large_mag_layers .items ():
56- if llama_family in qcfg ["model" ]:
57- qcfg ["qskip_layer_name" ] += [
58- f"model.layers.{ i } .mlp.down_proj" for i in layers
59- ]
60- break
61-
60+ if qcfg ["qskip_large_mag_layers" ]:
61+ large_mag_layers = {
62+ "2-7b" : [1 , 30 ],
63+ "2-70b" : [2 , 8 , 79 ],
64+ "3-8B" : [1 , 31 ],
65+ "3-70B" : [3 , 78 , 79 ],
66+ "405B-Instruct" : [5 , 124 , 125 ],
67+ }
68+ for llama_family , layers in large_mag_layers .items ():
69+ if llama_family in qcfg ["model" ]:
70+ qcfg ["qskip_layer_name" ] += [
71+ f"model.layers.{ i } .mlp.down_proj" for i in layers
72+ ]
73+ break
74+ elif any (model in qcfg ["model" ] for model in granite_architecture ) or any (
75+ model in qcfg ["model_type" ] for model in granite_architecture
76+ ):
77+ qcfg ["qlayer_name_pattern" ] = ["model.layers." ]
78+ qcfg ["scale_layers" ] = ["k_proj" , "v_proj" , "gate_proj" , "up_proj" ]
79+ # NOTE: supported granite-v3 models do not need layer skip for large magnitude
6280 elif "mixtral" in qcfg ["model" ]:
6381 qcfg ["qlayer_name_pattern" ] = (
6482 ["model.layers" ] if qcfg ["nbits_bmm1" ] == 32 else []
@@ -81,10 +99,10 @@ def config_quantize_smooth_layers(qcfg):
8199 ]
82100 ]
83101 qcfg ["act_scale_path" ] = "./act_scales/Mixtral-8x7B-v0.1.pt"
84- elif any (model in qcfg ["model" ] for model in granite_BigCode_architecture ):
102+ elif any (model in qcfg ["model" ] for model in bigcode_architecture ):
85103 qcfg ["qlayer_name_pattern" ] = ["transformer.h" ]
86104 qcfg ["scale_layers" ] = ["c_attn" , "c_fc" ]
87- qcfg [ "qskip_layer_name" ] = []
105+ # NOTE: supported bigcode models do not need layer skip for large magnitude
88106 if "granite-3b-base-v2" in qcfg ["model" ]:
89107 qcfg ["act_scale_path" ] = "./act_scales/granite_3b_base_v2_500_nw.pt"
90108 if "granite-13b-base-v2" in qcfg ["model" ]:
0 commit comments