File tree Expand file tree Collapse file tree 1 file changed +65
-0
lines changed
Expand file tree Collapse file tree 1 file changed +65
-0
lines changed Original file line number Diff line number Diff line change 1+ # Test configuration to verify evaluation is working
2+ # Runs very few steps with frequent evaluation
3+
4+ comm :
5+ trace_buf_size : 0
6+
7+ model :
8+ name : llama3
9+ flavor : 8B
10+ hf_assets_path : /home/hosseinkh/models/Meta-Llama-3.1-8B-Instruct
11+
12+ processes :
13+ procs : 8 # Just 2 processes for faster testing
14+ with_gpus : true
15+
16+ optimizer :
17+ name : AdamW
18+ lr : 1e-5
19+ eps : 1e-8
20+
21+
22+ lr_scheduler :
23+ warmup_steps : 2
24+
25+ dataset :
26+ path : " yahma/alpaca-cleaned"
27+ split : " train[:95%]"
28+
29+ dataset_val :
30+ path : " yahma/alpaca-cleaned"
31+ split : " train[95%:]"
32+
33+ training :
34+ local_batch_size : 4
35+ seq_len : 512 # Shorter sequences for speed
36+ max_norm : 1.0
37+ steps : 100 # Only 10 training steps total
38+ compile : false
39+
40+ validation :
41+ enabled : true # Enable/disable validation
42+ eval_interval : 100 # Run evaluation every 100 training steps
43+ eval_steps : 50 # Number of batches per evaluation (0 = full epoch)
44+
45+ parallelism :
46+ data_parallel_replicate_degree : 1
47+ data_parallel_shard_degree : -1
48+ tensor_parallel_degree : 2
49+ pipeline_parallel_degree : 1
50+ context_parallel_degree : 1
51+ expert_parallel_degree : 1
52+ disable_loss_parallel : false
53+
54+ checkpoint :
55+ enable : true
56+ folder : /home/hosseinkh/models/Meta-Llama-3.1-8B-Instruct/test_eval_checkpoints
57+ initial_load_path : /home/hosseinkh/models/Meta-Llama-3.1-8B-Instruct/
58+ initial_load_in_hf : true
59+ last_save_in_hf : true
60+ interval : 100 # Don't save frequently during test
61+ async_mode : disabled
62+
63+ activation_checkpoint :
64+ mode : selective
65+ selective_ac_option : op
You can’t perform that action at this time.
0 commit comments