Skip to content

Commit ce6fa69

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 1aef802 + 0e619dc commit ce6fa69

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

tests/fms/resources/config_meta_llama3_70b_instruct_lora.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
"per_device_train_batch_size": 1,
88
"per_device_eval_batch_size": 4,
99
"gradient_accumulation_steps": 4,
10+
"gradient_checkpointing": true,
1011
"save_strategy": "no",
1112
"learning_rate": 1e-5,
1213
"weight_decay": 0.0,
1314
"lr_scheduler_type": "cosine",
1415
"include_tokens_per_second": true,
1516
"response_template": "\n### Response:",
1617
"dataset_text_field": "output",
17-
"use_flash_attn": false,
18+
"use_flash_attn": true,
19+
"fast_kernels": [true, true, true],
1820
"peft_method": "lora"
1921
}

tests/fms/resources/config_mixtral_8x7b_instruct_v01_lora.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,17 @@
66
"num_train_epochs": 1.0,
77
"per_device_train_batch_size": 1,
88
"per_device_eval_batch_size": 4,
9-
"gradient_accumulation_steps": 1,
9+
"gradient_accumulation_steps": 4,
10+
"gradient_checkpointing": true,
1011
"save_strategy": "no",
1112
"learning_rate": 1e-5,
1213
"weight_decay": 0.0,
1314
"lr_scheduler_type": "cosine",
1415
"include_tokens_per_second": true,
1516
"response_template": "\n### Response:",
1617
"dataset_text_field": "output",
17-
"use_flash_attn": false,
18+
"use_flash_attn": true,
19+
"fast_kernels": [true, true, true],
1820
"peft_method": "lora"
1921
}
2022

0 commit comments

Comments
 (0)