We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 69e04c0 commit dae64ccCopy full SHA for dae64cc
tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json
@@ -9,6 +9,7 @@
9
"tensor_parallel_degree": 1,
10
"pipeline_parallel_degree": 1,
11
"sharding": "stage2",
12
+ "data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate",
13
"sharding_parallel_config": "enable_stage2_overlap",
14
"tensor_parallel_config": "enable_mp_async_allreduce",
15
"pipeline_parallel_config": "",
@@ -24,7 +25,7 @@
24
25
"learning_rate": 3e-05,
26
"min_learning_rate": 3e-06,
27
"warmup_steps": 30,
- "logging_steps": 1,
28
+ "logging_steps": 10,
29
"max_steps": 50,
30
"save_steps": 5000,
31
"eval_steps": 1000,
0 commit comments