Skip to content

Commit a57022b

Browse files
authored
Merge pull request #146 from georgian-io/better-config
Better Starter Config
2 parents a9aa01e + 69be88a commit a57022b

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

config.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ data:
2323

2424
# Model Definition -------------------
2525
model:
26-
hf_model_ckpt: "NousResearch/Llama-2-7b-hf"
26+
hf_model_ckpt: "mistralai/Mistral-7B-Instruct-v0.2"
2727
torch_dtype: "bfloat16"
2828
attn_implementation: "flash_attention_2"
2929
quantize: true
@@ -36,6 +36,7 @@ model:
3636
lora:
3737
task_type: "CAUSAL_LM"
3838
r: 32
39+
lora_alpha: 64
3940
lora_dropout: 0.1
4041
target_modules:
4142
- q_proj
@@ -49,12 +50,12 @@ lora:
4950
# Training -------------------
5051
training:
5152
training_args:
52-
num_train_epochs: 5
53+
num_train_epochs: 1
5354
per_device_train_batch_size: 4
5455
gradient_accumulation_steps: 4
5556
gradient_checkpointing: True
5657
optim: "paged_adamw_32bit"
57-
logging_steps: 100
58+
logging_steps: 1
5859
learning_rate: 2.0e-4
5960
bf16: true # Set to true for mixed precision training on Newer GPUs
6061
tf32: true
@@ -67,7 +68,7 @@ training:
6768
# neftune_noise_alpha: None
6869

6970
inference:
70-
max_new_tokens: 1024
71+
max_new_tokens: 256
7172
use_cache: True
7273
do_sample: True
7374
top_p: 0.9

0 commit comments

Comments
 (0)