File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
torchtitan/models/deepseek_v3 Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -8,7 +8,7 @@ DeepSeek-V3 is a Mixture-of-Experts (MoE) transformer model with Multi-head Late
8
8
9
9
``` bash
10
10
# DeepSeek 671B tokenizer (automatically downloads tokenizer.json and tokenizer_config.json)
11
- python scripts/download_hf_assets.py --repo_id deepseek-ai/DeepSeek-V3 --assets tokenizer
11
+ python scripts/download_hf_assets.py --repo_id deepseek-ai/DeepSeek-V3.1-Base --assets tokenizer
12
12
```
13
13
14
14
``` bash
Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ enable_wandb = false
20
20
[model ]
21
21
name = " deepseek_v3"
22
22
flavor = " 671B"
23
- hf_assets_path = " ./assets/hf/DeepSeek-V3"
23
+ hf_assets_path = " ./assets/hf/DeepSeek-V3.1-Base "
24
24
# converters = ["float8"]
25
25
26
26
[optimizer ]
You can’t perform that action at this time.
0 commit comments