Skip to content

Commit be94bdd

Browse files
zeroRainsyuanlehomeXieYunshen
authored
[Loader V1] modify layername for DeepSeekV3 (#3336)
Co-authored-by: Yuanle Liu <[email protected]> Co-authored-by: YUNSHEN XIE <[email protected]>
1 parent f702a67 commit be94bdd

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ def __init__(
539539
prefix="deepseek_v3.embed_tokens",
540540
)
541541

542-
self.decoder_layers = nn.LayerList(
542+
self.layers = nn.LayerList(
543543
[
544544
DeepSeekV3DecoderLayer(
545545
fd_config,
@@ -564,7 +564,7 @@ def load_state_dict(self, state_dict):
564564
self.norm.load_state_dict(state_dict)
565565
for i in range(self.num_layers):
566566
logger.info(f"Start load layer {i}")
567-
self.decoder_layers[i].load_state_dict(state_dict)
567+
self.layers[i].load_state_dict(state_dict)
568568

569569
def forward(
570570
self,
@@ -578,7 +578,7 @@ def forward(
578578

579579
residual = None
580580
for i in range(self.num_layers):
581-
hidden_states, residual = self.decoder_layers[i](
581+
hidden_states, residual = self.layers[i](
582582
forward_meta,
583583
hidden_states,
584584
residual,
@@ -658,12 +658,11 @@ def load_weights(self, weights_iterator) -> None:
658658

659659
for loaded_weight_name, loaded_weight in weights_iterator:
660660
loaded_weight_name = loaded_weight_name.replace("deepseek_v3", "model")
661-
loaded_weight_name = loaded_weight_name.replace("layers", "decoder_layers")
662661

663662
for param_name, weight_name, shard_id in stacked_params_mapping:
664663
if weight_name not in loaded_weight_name:
665664
continue
666-
if "mlp.experts." in loaded_weight_name and loaded_weight_name not in params_dict:
665+
if "mlp.experts." in loaded_weight_name:
667666
continue
668667
model_param_name = loaded_weight_name.replace(weight_name, param_name)
669668

0 commit comments

Comments
 (0)