File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change 2323
2424# Model Definition -------------------
2525model :
26- hf_model_ckpt : " NousResearch/Llama-2-7b-hf "
26+ hf_model_ckpt : " mistralai/Mistral-7B-Instruct-v0.2 "
2727 torch_dtype : " bfloat16"
2828 attn_implementation : " flash_attention_2"
2929 quantize : true
3636lora :
3737 task_type : " CAUSAL_LM"
3838 r : 32
39+ lora_alpha : 64
3940 lora_dropout : 0.1
4041 target_modules :
4142 - q_proj
@@ -49,12 +50,12 @@ lora:
4950# Training -------------------
5051training :
5152 training_args :
52- num_train_epochs : 5
53+ num_train_epochs : 1
5354 per_device_train_batch_size : 4
5455 gradient_accumulation_steps : 4
5556 gradient_checkpointing : True
5657 optim : " paged_adamw_32bit"
57- logging_steps : 100
58+ logging_steps : 1
5859 learning_rate : 2.0e-4
5960 bf16 : true # Set to true for mixed precision training on Newer GPUs
6061 tf32 : true
@@ -67,7 +68,7 @@ training:
6768 # neftune_noise_alpha: None
6869
6970inference :
70- max_new_tokens : 1024
71+ max_new_tokens : 256
7172 use_cache : True
7273 do_sample : True
7374 top_p : 0.9
You can’t perform that action at this time.
0 commit comments