Skip to content

Commit 7bd6509

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents e4d94fa + ce2fa2c commit 7bd6509

File tree

4 files changed

+43
-10
lines changed

4 files changed

+43
-10
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"model_name_or_path": "mistralai/Mistral-7B-v0.3",
3+
"training_data_path": "/mnt/scratch/dataset/alpaca_data.json",
4+
"output_dir": "/mnt/output/model",
5+
"save_model_dir": "/mnt/output/model",
6+
"num_train_epochs": 1.0,
7+
"per_device_train_batch_size": 1,
8+
"per_device_eval_batch_size": 4,
9+
"gradient_accumulation_steps": 1,
10+
"save_strategy": "no",
11+
"learning_rate": 1e-5,
12+
"weight_decay": 0.0,
13+
"lr_scheduler_type": "cosine",
14+
"include_tokens_per_second": true,
15+
"response_template": "\n### Response:",
16+
"dataset_text_field": "output",
17+
"use_flash_attn": false
18+
}
19+

tests/kfto/core/config_mixtral_8x7b_instruct_v01.json renamed to tests/kfto/core/config_mixtral_8x7b_instruct_v01_lora.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"include_tokens_per_second": true,
1515
"response_template": "\n### Response:",
1616
"dataset_text_field": "output",
17-
"use_flash_attn": false
17+
"use_flash_attn": false,
18+
"peft_method": "lora"
1819
}
1920

tests/kfto/core/kfto_kueue_sft_GPU_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,16 @@ func TestMultiGpuPytorchjobMetaLlama370bInstructLoRa(t *testing.T) {
6767
runMultiGpuPytorchjob(t, "config_meta_llama3_70b_instruct_lora.json")
6868
}
6969

70+
func TestMultiGpuPytorchjobMistral7bv03(t *testing.T) {
71+
runMultiGpuPytorchjob(t, "config_mistral_7b_v03.json")
72+
}
73+
7074
func TestMultiGpuPytorchjobMixtral8x7bv01(t *testing.T) {
7175
runMultiGpuPytorchjob(t, "config_mixtral_8x7b_v01.json")
7276
}
7377

74-
func TestMultiGpuPytorchjobMixtral8x7bInstructv01(t *testing.T) {
75-
runMultiGpuPytorchjob(t, "config_mixtral_8x7b_instruct_v01.json")
78+
func TestMultiGpuPytorchjobMixtral8x7bInstructv01LoRa(t *testing.T) {
79+
runMultiGpuPytorchjob(t, "config_mixtral_8x7b_instruct_v01_lora.json")
7680
}
7781

7882
func TestMultiGpuPytorchjobMerlinite7b(t *testing.T) {

tests/kfto/upgrade/kfto_kueue_sft_upgrade_training_test.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,12 @@ func createPyTorchJob(test Test, namespace, localQueueName string, config corev1
154154
ImagePullPolicy: corev1.PullIfNotPresent,
155155
VolumeMounts: []corev1.VolumeMount{
156156
{
157-
Name: "model-volume",
158-
MountPath: "/tmp/model",
157+
Name: "tmp-volume",
158+
MountPath: "/tmp",
159159
},
160160
},
161-
Command: []string{"cp", "-r", "/models/bloom-560m", "/tmp/model"},
161+
Command: []string{"/bin/sh", "-c"},
162+
Args: []string{"mkdir /tmp/model; cp -r /models/bloom-560m /tmp/model"},
162163
},
163164
},
164165
Containers: []corev1.Container{
@@ -171,21 +172,29 @@ func createPyTorchJob(test Test, namespace, localQueueName string, config corev1
171172
Name: "SFT_TRAINER_CONFIG_JSON_PATH",
172173
Value: "/etc/config/config.json",
173174
},
175+
{
176+
Name: "HF_HOME",
177+
Value: "/tmp/huggingface",
178+
},
174179
},
175180
VolumeMounts: []corev1.VolumeMount{
176181
{
177182
Name: "config-volume",
178183
MountPath: "/etc/config",
179184
},
180185
{
181-
Name: "model-volume",
182-
MountPath: "/tmp/model",
186+
Name: "tmp-volume",
187+
MountPath: "/tmp",
183188
},
184189
},
185190
Resources: corev1.ResourceRequirements{
186191
Requests: corev1.ResourceList{
187192
corev1.ResourceCPU: resource.MustParse("2"),
188-
corev1.ResourceMemory: resource.MustParse("5Gi"),
193+
corev1.ResourceMemory: resource.MustParse("7Gi"),
194+
},
195+
Limits: corev1.ResourceList{
196+
corev1.ResourceCPU: resource.MustParse("2"),
197+
corev1.ResourceMemory: resource.MustParse("7Gi"),
189198
},
190199
},
191200
},
@@ -212,7 +221,7 @@ func createPyTorchJob(test Test, namespace, localQueueName string, config corev1
212221
},
213222
},
214223
{
215-
Name: "model-volume",
224+
Name: "tmp-volume",
216225
VolumeSource: corev1.VolumeSource{
217226
EmptyDir: &corev1.EmptyDirVolumeSource{},
218227
},

0 commit comments

Comments
 (0)