Skip to content

Commit b574ebf

Browse files
author
wangzaijun
committed
fix all
1 parent b4ef3b8 commit b574ebf

File tree

16 files changed

+61
-71
lines changed

16 files changed

+61
-71
lines changed

lightllm/common/basemodel/layer_weights/meta_weights/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22
from .mm_weight import (
33
MMWeightPack,
44
MMWeightTpl,
5-
MultiMMWeightTpl,
65
ROWMMWeight,
76
COLMMWeight,
8-
MultiROWMMWeight,
97
ROWBMMWeight,
10-
AWQMultiMMWeightTpl,
118
)
129
from .norm_weight import NormWeight, GEMMANormWeight, TpNormWeight
1310
from .fused_moe_weight_tp import FusedMoeWeightTP
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
from .mm_weight import (
22
MMWeightPack,
33
MMWeightTpl,
4-
MultiMMWeightTpl,
5-
AWQMultiMMWeightTpl,
64
)
75
from .mm_factory import (
86
MMWeight,
97
ROWMMWeight,
10-
MultiROWMMWeight,
118
ROWBMMWeight,
129
COLMMWeight,
1310
)

lightllm/common/basemodel/layer_weights/transformer_layer_weight.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# from lightllm.common.layers.mm import MM
44
from .base_layer_weight import BaseLayerWeight
5-
from .meta_weights import BaseWeight, MultiMMWeightTpl
5+
from .meta_weights import BaseWeight, MMWeightTpl
66
from lightllm.utils.log_utils import init_logger
77

88
logger = init_logger(__name__)
@@ -36,7 +36,7 @@ def load_hf_weights(self, weights):
3636
"""
3737
for attr_name in dir(self):
3838
attr = getattr(self, attr_name, None)
39-
if isinstance(attr, MultiMMWeightTpl):
39+
if isinstance(attr, MMWeightTpl) and len(attr.weight_names) >= 2:
4040
with self.lock:
4141
attr.load_hf_weights(weights)
4242
elif isinstance(attr, BaseWeight):

lightllm/models/bloom/layer_weights/transformer_layer_weight.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,17 @@ def load_hf_weights(self, weights):
108108

109109
def _init_ffn(self):
110110
self.gate_up_proj = ROWMMWeight(
111-
weight_name=self._gate_up_weight_name,
111+
weight_names=self._gate_up_weight_name,
112112
data_type=self.data_type_,
113-
bias_name=self._gate_up_bias_name,
113+
bias_names=self._gate_up_bias_name,
114114
quant_cfg=self.quant_cfg,
115115
layer_num=self.layer_num_,
116116
name="gate_up_proj",
117117
)
118118
self.down_proj = COLMMWeight(
119-
weight_name=self._down_weight_name,
119+
weight_names=self._down_weight_name,
120120
data_type=self.data_type_,
121-
bias_name=self._down_bias_name,
121+
bias_names=self._down_bias_name,
122122
quant_cfg=self.quant_cfg,
123123
layer_num=self.layer_num_,
124124
name="down_proj",

lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from lightllm.utils.envs_utils import enable_env_vars, get_env_start_args
77
from lightllm.common.basemodel.layer_weights.meta_weights import (
88
ROWMMWeight,
9-
MultiROWMMWeight,
109
COLMMWeight,
1110
NormWeight,
1211
FusedMoeWeightTP,
@@ -142,14 +141,14 @@ def load_hf_weights(self, weights):
142141
def _init_qkvo(self):
143142
if self.q_lora_rank is None:
144143
self.q_weight_ = ROWMMWeight(
145-
weight_name=f"model.layers.{self.layer_num_}.self_attn.q_proj.weight",
144+
weight_names=f"model.layers.{self.layer_num_}.self_attn.q_proj.weight",
146145
data_type=self.data_type_,
147146
quant_cfg=self.quant_cfg,
148147
layer_num=self.layer_num_,
149148
name="q_weight",
150149
)
151150
self.kv_a_proj_with_mqa_ = ROWMMWeight(
152-
weight_name=f"model.layers.{self.layer_num_}.self_attn.kv_a_proj_with_mqa.weight",
151+
weight_names=f"model.layers.{self.layer_num_}.self_attn.kv_a_proj_with_mqa.weight",
153152
data_type=self.data_type_,
154153
quant_cfg=self.quant_cfg,
155154
layer_num=self.layer_num_,
@@ -158,7 +157,7 @@ def _init_qkvo(self):
158157
tp_world_size=1,
159158
)
160159
else:
161-
self.qkv_a_proj_with_mqa_ = MultiROWMMWeight(
160+
self.qkv_a_proj_with_mqa_ = ROWMMWeight(
162161
weight_names=[
163162
f"model.layers.{self.layer_num_}.self_attn.q_a_proj.weight",
164163
f"model.layers.{self.layer_num_}.self_attn.kv_a_proj_with_mqa.weight",
@@ -171,37 +170,37 @@ def _init_qkvo(self):
171170
tp_world_size=1,
172171
)
173172
self.q_b_proj_ = ROWMMWeight(
174-
weight_name=f"model.layers.{self.layer_num_}.self_attn.q_b_proj.weight",
173+
weight_names=f"model.layers.{self.layer_num_}.self_attn.q_b_proj.weight",
175174
data_type=self.data_type_,
176175
quant_cfg=self.quant_cfg,
177176
layer_num=self.layer_num_,
178177
name="q_b_proj",
179178
)
180179
self.k_b_proj_ = ROWBMMWeight(
181-
weight_name=f"model.layers.{self.layer_num_}.self_attn.k_b_proj.weight",
180+
weight_names=f"model.layers.{self.layer_num_}.self_attn.k_b_proj.weight",
182181
data_type=self.data_type_,
183182
quant_cfg=None,
184183
layer_num=self.layer_num_,
185184
name="k_b_proj",
186185
)
187186
self.v_b_proj_ = ROWBMMWeight(
188-
weight_name=f"model.layers.{self.layer_num_}.self_attn.v_b_proj.weight",
187+
weight_names=f"model.layers.{self.layer_num_}.self_attn.v_b_proj.weight",
189188
data_type=self.data_type_,
190189
quant_cfg=None,
191190
layer_num=self.layer_num_,
192191
name="v_b_proj",
193192
)
194193
if self.enable_cc_method:
195194
self.cc_kv_b_proj_ = ROWMMWeight(
196-
weight_name=f"model.layers.{self.layer_num_}.self_attn.kv_b_proj.weight",
195+
weight_names=f"model.layers.{self.layer_num_}.self_attn.kv_b_proj.weight",
197196
data_type=self.data_type_,
198197
quant_cfg=self.quant_cfg,
199198
layer_num=self.layer_num_,
200199
name="cc_kv_b_proj",
201200
)
202201

203202
self.o_weight_ = COLMMWeight(
204-
weight_name=f"model.layers.{self.layer_num_}.self_attn.o_proj.weight",
203+
weight_names=f"model.layers.{self.layer_num_}.self_attn.o_proj.weight",
205204
data_type=self.data_type_,
206205
quant_cfg=self.quant_cfg,
207206
layer_num=self.layer_num_,
@@ -211,7 +210,7 @@ def _init_qkvo(self):
211210
def _load_mlp(self, mlp_prefix):
212211
moe_mode = os.getenv("MOE_MODE", "TP")
213212
if self.is_moe and moe_mode == "EP":
214-
self.gate_up_proj = MultiROWMMWeight(
213+
self.gate_up_proj = ROWMMWeight(
215214
weight_names=[f"{mlp_prefix}.gate_proj.weight", f"{mlp_prefix}.up_proj.weight"],
216215
data_type=self.data_type_,
217216
quant_cfg=self.quant_cfg,
@@ -221,7 +220,7 @@ def _load_mlp(self, mlp_prefix):
221220
tp_world_size=1,
222221
)
223222
self.down_proj = COLMMWeight(
224-
weight_name=f"{mlp_prefix}.down_proj.weight",
223+
weight_names=f"{mlp_prefix}.down_proj.weight",
225224
data_type=self.data_type_,
226225
quant_cfg=self.quant_cfg,
227226
layer_num=self.layer_num_,
@@ -230,15 +229,15 @@ def _load_mlp(self, mlp_prefix):
230229
tp_world_size=1,
231230
)
232231
else:
233-
self.gate_up_proj = MultiROWMMWeight(
232+
self.gate_up_proj = ROWMMWeight(
234233
weight_names=[f"{mlp_prefix}.gate_proj.weight", f"{mlp_prefix}.up_proj.weight"],
235234
data_type=self.data_type_,
236235
quant_cfg=self.quant_cfg,
237236
layer_num=self.layer_num_,
238237
name="gate_up_proj",
239238
)
240239
self.down_proj = COLMMWeight(
241-
weight_name=f"{mlp_prefix}.down_proj.weight",
240+
weight_names=f"{mlp_prefix}.down_proj.weight",
242241
data_type=self.data_type_,
243242
quant_cfg=self.quant_cfg,
244243
layer_num=self.layer_num_,
@@ -248,7 +247,7 @@ def _load_mlp(self, mlp_prefix):
248247
def _init_moe(self):
249248
moe_intermediate_size = self.network_config_["moe_intermediate_size"]
250249
self.moe_gate = ROWMMWeight(
251-
weight_name=f"model.layers.{self.layer_num_}.mlp.gate.weight",
250+
weight_names=f"model.layers.{self.layer_num_}.mlp.gate.weight",
252251
data_type=self.data_type_,
253252
layer_num=self.layer_num_,
254253
name="moe_gate",

lightllm/models/gemma3/layer_weights/transformer_layer_weight.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,17 @@ def _init_weight_names(self):
2828

2929
def _init_ffn(self):
3030
self.gate_proj = ROWMMWeight(
31-
weight_name=self._gate_weight_name,
31+
weight_names=self._gate_weight_name,
3232
data_type=self.data_type_,
33-
bias_name=self._gate_bias_name,
33+
bias_names=self._gate_bias_name,
3434
quant_cfg=self.quant_cfg,
3535
layer_num=self.layer_num_,
3636
name="gate_proj",
3737
)
3838
self.up_proj = ROWMMWeight(
39-
weight_name=self._up_weight_name,
39+
weight_names=self._up_weight_name,
4040
data_type=self.data_type_,
41-
bias_name=self._up_bias_name,
41+
bias_names=self._up_bias_name,
4242
quant_cfg=self.quant_cfg,
4343
layer_num=self.layer_num_,
4444
name="up_proj",
@@ -47,17 +47,17 @@ def _init_ffn(self):
4747

4848
def _init_qkv(self):
4949
self.k_proj = ROWMMWeight(
50-
weight_name=self._k_weight_name,
50+
weight_names=self._k_weight_name,
5151
data_type=self.data_type_,
52-
bias_name=self._k_bias_name,
52+
bias_names=self._k_bias_name,
5353
quant_cfg=self.quant_cfg,
5454
layer_num=self.layer_num_,
5555
name="k_proj",
5656
)
5757
self.v_proj = ROWMMWeight(
58-
weight_name=self._v_weight_name,
58+
weight_names=self._v_weight_name,
5959
data_type=self.data_type_,
60-
bias_name=self._v_bias_name,
60+
bias_names=self._v_bias_name,
6161
quant_cfg=self.quant_cfg,
6262
layer_num=self.layer_num_,
6363
name="v_proj",

lightllm/models/gemma_2b/layer_weights/transformer_layer_weight.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import math
33
import numpy as np
44
from lightllm.models.llama.layer_weights.transformer_layer_weight import LlamaTransformerLayerWeight
5-
from lightllm.common.basemodel.layer_weights.meta_weights import GEMMANormWeight, ROWMMWeight, MultiROWMMWeight
5+
from lightllm.common.basemodel.layer_weights.meta_weights import GEMMANormWeight, ROWMMWeight
66

77

88
class Gemma_2bTransformerLayerWeight(LlamaTransformerLayerWeight):
@@ -12,14 +12,14 @@ def __init__(self, layer_num, data_type, network_config, mode=[], quant_cfg=None
1212

1313
def _init_qkv(self):
1414
self.q_proj = ROWMMWeight(
15-
weight_name=self._q_weight_name,
15+
weight_names=self._q_weight_name,
1616
data_type=self.data_type_,
17-
bias_name=self._q_bias_name,
17+
bias_names=self._q_bias_name,
1818
quant_cfg=self.quant_cfg,
1919
layer_num=self.layer_num_,
2020
name="q_proj",
2121
)
22-
self.kv_proj = MultiROWMMWeight(
22+
self.kv_proj = ROWMMWeight(
2323
weight_names=[self._k_weight_name, self._v_weight_name],
2424
data_type=self.data_type_,
2525
bias_names=[self._k_bias_name, self._v_bias_name],

lightllm/models/gpt_oss/layer_weights/transformer_layer_weight.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ def _init_moe(self):
3030
assert moe_mode in ["TP"], "For now, GPT-OSS type model only support MOE TP mode."
3131

3232
self.moe_gate = ROWMMWeight(
33-
weight_name=self._router_weight_name,
33+
weight_names=self._router_weight_name,
3434
data_type=self.data_type_,
3535
layer_num=self.layer_num_,
36-
bias_name=self._router_bias_name,
36+
bias_names=self._router_bias_name,
3737
name="moe_gate",
3838
tp_rank=0,
3939
tp_world_size=1,

lightllm/models/llama/layer_weights/transformer_layer_weight.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import math
33
import numpy as np
44
from lightllm.common.basemodel import TransformerLayerWeight
5-
from lightllm.common.basemodel.layer_weights.meta_weights import ROWMMWeight, COLMMWeight, NormWeight, MultiROWMMWeight
5+
from lightllm.common.basemodel.layer_weights.meta_weights import ROWMMWeight, COLMMWeight, NormWeight
66

77

88
class LlamaTransformerLayerWeight(TransformerLayerWeight):
@@ -58,14 +58,14 @@ def _init_weight_names(self):
5858

5959
def _init_qkv(self):
6060
self.q_proj = ROWMMWeight(
61-
weight_name=self._q_weight_name,
61+
weight_names=self._q_weight_name,
6262
data_type=self.data_type_,
63-
bias_name=self._q_bias_name,
63+
bias_names=self._q_bias_name,
6464
quant_cfg=self.quant_cfg,
6565
layer_num=self.layer_num_,
6666
name="q_proj",
6767
)
68-
self.kv_proj = MultiROWMMWeight(
68+
self.kv_proj = ROWMMWeight(
6969
weight_names=[self._k_weight_name, self._v_weight_name],
7070
data_type=self.data_type_,
7171
bias_names=[self._k_bias_name, self._v_bias_name],
@@ -76,16 +76,16 @@ def _init_qkv(self):
7676

7777
def _init_o(self):
7878
self.o_proj = COLMMWeight(
79-
weight_name=self._o_weight_name,
79+
weight_names=self._o_weight_name,
8080
data_type=self.data_type_,
81-
bias_name=self._o_bias_name,
81+
bias_names=self._o_bias_name,
8282
quant_cfg=self.quant_cfg,
8383
layer_num=self.layer_num_,
8484
name="o_proj",
8585
)
8686

8787
def _init_ffn(self):
88-
self.gate_up_proj = MultiROWMMWeight(
88+
self.gate_up_proj = ROWMMWeight(
8989
weight_names=[self._gate_weight_name, self._up_weight_name],
9090
data_type=self.data_type_,
9191
bias_names=[self._gate_bias_name, self._up_bias_name],
@@ -94,9 +94,9 @@ def _init_ffn(self):
9494
name="gate_up_proj",
9595
)
9696
self.down_proj = COLMMWeight(
97-
weight_name=self._down_weight_name,
97+
weight_names=self._down_weight_name,
9898
data_type=self.data_type_,
99-
bias_name=self._down_bias_name,
99+
bias_names=self._down_bias_name,
100100
quant_cfg=self.quant_cfg,
101101
layer_num=self.layer_num_,
102102
name="down_proj",

lightllm/models/mixtral/layer_weights/transformer_layer_weight.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ def _init_moe(self):
3939
split_inter_size = inter_size // self.tp_world_size_
4040

4141
self.moe_gate = ROWMMWeight(
42-
weight_name=self.moe_gate_weight_name,
42+
weight_names=self.moe_gate_weight_name,
4343
data_type=self.data_type_,
44-
bias_name=self.moe_gate_bias_name,
44+
bias_names=self.moe_gate_bias_name,
4545
quant_cfg=self.quant_cfg,
4646
layer_num=self.layer_num_,
4747
name="moe_gate",

0 commit comments

Comments
 (0)