@@ -110,8 +110,8 @@ func createKFTOPyTorchJob(test Test, namespace string, config corev1.ConfigMap,
110
110
ImagePullPolicy : corev1 .PullIfNotPresent ,
111
111
VolumeMounts : []corev1.VolumeMount {
112
112
{
113
- Name : "data -volume" ,
114
- MountPath : "/tmp/dataset " ,
113
+ Name : "tmp -volume" ,
114
+ MountPath : "/tmp" ,
115
115
},
116
116
},
117
117
Command : []string {
@@ -135,12 +135,7 @@ func createKFTOPyTorchJob(test Test, namespace string, config corev1.ConfigMap,
135
135
ImagePullPolicy : corev1 .PullIfNotPresent ,
136
136
Command : []string {
137
137
"/bin/bash" , "-c" ,
138
- `export HF_HOME=/tmp/.cache && \
139
- export TRITON_CACHE_DIR=/tmp/.triton && \
140
- export TOKENIZERS_PARALLELISM=false && \
141
- export RANK=0 && \
142
- export WORLD_SIZE=1 && \
143
- python /etc/config/hf_llm_training.py \
138
+ `python /etc/config/hf_llm_training.py \
144
139
--model_uri /tmp/model/bloom-560m \
145
140
--model_dir /tmp/model/bloom-560m \
146
141
--dataset_dir /tmp/dataset \
@@ -158,12 +153,8 @@ func createKFTOPyTorchJob(test Test, namespace string, config corev1.ConfigMap,
158
153
Value : "/tmp/.triton" ,
159
154
},
160
155
{
161
- Name : "RANK" ,
162
- Value : "0" ,
163
- },
164
- {
165
- Name : "WORLD_SIZE" ,
166
- Value : "1" ,
156
+ Name : "TOKENIZERS_PARALLELISM" ,
157
+ Value : "false" ,
167
158
},
168
159
},
169
160
VolumeMounts : []corev1.VolumeMount {
@@ -179,10 +170,6 @@ func createKFTOPyTorchJob(test Test, namespace string, config corev1.ConfigMap,
179
170
Name : "output-volume" ,
180
171
MountPath : "/mnt/output" ,
181
172
},
182
- {
183
- Name : "data-volume" ,
184
- MountPath : "tmp/dataset" ,
185
- },
186
173
},
187
174
Resources : corev1.ResourceRequirements {
188
175
Requests : corev1.ResourceList {
@@ -219,24 +206,6 @@ func createKFTOPyTorchJob(test Test, namespace string, config corev1.ConfigMap,
219
206
EmptyDir : & corev1.EmptyDirVolumeSource {},
220
207
},
221
208
},
222
- {
223
- Name : "data-volume" ,
224
- VolumeSource : corev1.VolumeSource {
225
- Ephemeral : & corev1.EphemeralVolumeSource {
226
- VolumeClaimTemplate : & corev1.PersistentVolumeClaimTemplate {
227
- Spec : corev1.PersistentVolumeClaimSpec {
228
- AccessModes : []corev1.PersistentVolumeAccessMode {corev1 .ReadWriteOnce },
229
- Resources : corev1.VolumeResourceRequirements {
230
- Requests : corev1.ResourceList {
231
- corev1 .ResourceStorage : resource .MustParse ("2000Gi" ),
232
- },
233
- },
234
- VolumeMode : Ptr (corev1 .PersistentVolumeFilesystem ),
235
- },
236
- },
237
- },
238
- },
239
- },
240
209
{
241
210
Name : "output-volume" ,
242
211
VolumeSource : corev1.VolumeSource {
0 commit comments