@@ -111,14 +111,17 @@ def load_hf_weights(self, weights):
111111 if self .quant_cfg .quantized_weight :
112112 kv_b_proj_ = weight_dequant (
113113 kv_b_proj_ .cuda (),
114- weights [f"model.layers.{ self .layer_num_ } .self_attn.kv_b_proj." + kv_b_quant_method .weight_scale_suffix ].cuda (),
114+ weights [
115+ f"model.layers.{ self .layer_num_ } .self_attn.kv_b_proj." + kv_b_quant_method .weight_scale_suffix
116+ ].cuda (),
115117 ).cpu ()
116118 weights [f"model.layers.{ self .layer_num_ } .self_attn.k_b_proj.weight" ] = self ._load_kb (kv_b_proj_ )
117119 weights [f"model.layers.{ self .layer_num_ } .self_attn.v_b_proj.weight" ] = self ._load_vb (kv_b_proj_ )
118120
119121 if (
120122 self .quant_cfg .quantized_weight
121- and f"model.layers.{ self .layer_num_ } .self_attn.kv_b_proj." + kv_b_quant_method .weight_scale_suffix in weights
123+ and f"model.layers.{ self .layer_num_ } .self_attn.kv_b_proj." + kv_b_quant_method .weight_scale_suffix
124+ in weights
122125 ):
123126 kv_b_proj_scale_ = weights [
124127 f"model.layers.{ self .layer_num_ } .self_attn.kv_b_proj." + kv_b_quant_method .weight_scale_suffix
@@ -175,14 +178,14 @@ def _init_qkvo(self):
175178 self .k_b_proj_ = ROWBMMWeight (
176179 weight_name = f"model.layers.{ self .layer_num_ } .self_attn.k_b_proj.weight" ,
177180 data_type = self .data_type_ ,
178- quant_cfg = self . quant_cfg ,
181+ quant_cfg = None ,
179182 layer_num = self .layer_num_ ,
180183 name = "k_b_proj" ,
181184 )
182185 self .v_b_proj_ = ROWBMMWeight (
183186 weight_name = f"model.layers.{ self .layer_num_ } .self_attn.v_b_proj.weight" ,
184187 data_type = self .data_type_ ,
185- quant_cfg = self . quant_cfg ,
188+ quant_cfg = None ,
186189 layer_num = self .layer_num_ ,
187190 name = "v_b_proj" ,
188191 )
@@ -247,7 +250,7 @@ def _init_qkvo_dp(self):
247250 self .k_b_proj_ = ROWBMMWeight (
248251 weight_name = f"model.layers.{ self .layer_num_ } .self_attn.k_b_proj.weight" ,
249252 data_type = self .data_type_ ,
250- quant_cfg = self . quant_cfg ,
253+ quant_cfg = None ,
251254 layer_num = self .layer_num_ ,
252255 name = "k_b_proj" ,
253256 tp_rank = 0 ,
@@ -257,7 +260,7 @@ def _init_qkvo_dp(self):
257260 self .v_b_proj_ = ROWBMMWeight (
258261 weight_name = f"model.layers.{ self .layer_num_ } .self_attn.v_b_proj.weight" ,
259262 data_type = self .data_type_ ,
260- quant_cfg = self . quant_cfg ,
263+ quant_cfg = None ,
261264 layer_num = self .layer_num_ ,
262265 name = "v_b_proj" ,
263266 tp_rank = 0 ,
0 commit comments