Skip to content

Commit 63b17e3

Browse files
authored
chat template and example for qwen3 (axolotl-ai-cloud#2577)
1 parent 1178a15 commit 63b17e3

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

examples/qwen3/32b-qlora.yaml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
base_model: Qwen/Qwen3-32B
2+
# Automatically upload checkpoint and final model to HF
3+
# hub_model_id: username/custom_model_name
4+
5+
plugins:
6+
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
7+
strict: false
8+
9+
chat_template: qwen3
10+
datasets:
11+
- path: mlabonne/FineTome-100k
12+
type: chat_template
13+
split: train[:20%]
14+
field_messages: conversations
15+
message_property_mappings:
16+
role: from
17+
content: value
18+
val_set_size: 0.0
19+
output_dir: ./outputs/out
20+
dataset_prepared_path: last_run_prepared
21+
22+
sequence_len: 2048
23+
sample_packing: true
24+
eval_sample_packing: true
25+
pad_to_sequence_len: true
26+
27+
load_in_4bit: true
28+
adapter: qlora
29+
lora_r: 16
30+
lora_alpha: 32
31+
lora_target_modules:
32+
- q_proj
33+
- k_proj
34+
- v_proj
35+
- o_proj
36+
- down_proj
37+
- up_proj
38+
lora_mlp_kernel: true
39+
lora_qkv_kernel: true
40+
lora_o_kernel: true
41+
42+
wandb_project:
43+
wandb_entity:
44+
wandb_watch:
45+
wandb_name:
46+
wandb_log_model:
47+
48+
gradient_accumulation_steps: 2
49+
micro_batch_size: 1
50+
num_epochs: 1
51+
optimizer: adamw_torch_4bit
52+
lr_scheduler: cosine
53+
learning_rate: 0.0002
54+
55+
bf16: auto
56+
tf32: true
57+
58+
gradient_checkpointing: offload
59+
gradient_checkpointing_kwargs:
60+
use_reentrant: false
61+
resume_from_checkpoint:
62+
logging_steps: 1
63+
flash_attention: true
64+
65+
warmup_steps: 10
66+
evals_per_epoch: 4
67+
saves_per_epoch: 1
68+
weight_decay: 0.0
69+
special_tokens:

0 commit comments

Comments
 (0)