Skip to content

Commit b313e86

Browse files
authored
fix deepseek2(#924)
1 parent 0813466 commit b313e86

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

swift/llm/utils/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2582,9 +2582,9 @@ def get_model_tokenizer_deepseek2(model_dir: str,
25822582
model_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
25832583
model, tokenizer = get_model_tokenizer_from_repo(
25842584
model_dir, torch_dtype, model_kwargs, load_model, model_config=model_config, **kwargs)
2585-
model.generation_config.pad_token_id = model.generation_config.eos_token_id
25862585
if model is not None:
25872586
# fix dtype bug
2587+
model.generation_config.pad_token_id = model.generation_config.eos_token_id
25882588
mlp_cls = model.model.layers[1].mlp.__class__
25892589
for module in model.modules():
25902590
if isinstance(module, mlp_cls):

0 commit comments

Comments
 (0)