File tree Expand file tree Collapse file tree 3 files changed +71
-0
lines changed Expand file tree Collapse file tree 3 files changed +71
-0
lines changed Original file line number Diff line number Diff line change
1
+ base_model : Qwen/Qwen3-32B
2
+ # Automatically upload checkpoint and final model to HF
3
+ # hub_model_id: username/custom_model_name
4
+
5
+ plugins :
6
+ - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
7
+ strict : false
8
+
9
+ chat_template : qwen3
10
+ datasets :
11
+ - path : mlabonne/FineTome-100k
12
+ type : chat_template
13
+ split : train[:20%]
14
+ field_messages : conversations
15
+ message_property_mappings :
16
+ role : from
17
+ content : value
18
+ val_set_size : 0.0
19
+ output_dir : ./outputs/out
20
+ dataset_prepared_path : last_run_prepared
21
+
22
+ sequence_len : 2048
23
+ sample_packing : true
24
+ eval_sample_packing : true
25
+ pad_to_sequence_len : true
26
+
27
+ load_in_4bit : true
28
+ adapter : qlora
29
+ lora_r : 16
30
+ lora_alpha : 32
31
+ lora_target_modules :
32
+ - q_proj
33
+ - k_proj
34
+ - v_proj
35
+ - o_proj
36
+ - down_proj
37
+ - up_proj
38
+ lora_mlp_kernel : true
39
+ lora_qkv_kernel : true
40
+ lora_o_kernel : true
41
+
42
+ wandb_project :
43
+ wandb_entity :
44
+ wandb_watch :
45
+ wandb_name :
46
+ wandb_log_model :
47
+
48
+ gradient_accumulation_steps : 2
49
+ micro_batch_size : 1
50
+ num_epochs : 1
51
+ optimizer : adamw_torch_4bit
52
+ lr_scheduler : cosine
53
+ learning_rate : 0.0002
54
+
55
+ bf16 : auto
56
+ tf32 : true
57
+
58
+ gradient_checkpointing : offload
59
+ gradient_checkpointing_kwargs :
60
+ use_reentrant : false
61
+ resume_from_checkpoint :
62
+ logging_steps : 1
63
+ flash_attention : true
64
+
65
+ warmup_steps : 10
66
+ evals_per_epoch : 4
67
+ saves_per_epoch : 1
68
+ weight_decay : 0.0
69
+ special_tokens :
You can’t perform that action at this time.
0 commit comments