Skip to content

Commit 98e491a

Browse files
committed
docker updates for a100
1 parent 202e5cb commit 98e491a

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

docker/scripts/startup.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ fi
174174

175175
# Check if arguments are valid
176176
VALID_DATASETS=("criteo1tb" "imagenet" "fastmri" "ogbg" "librispeech" \
177-
"wmt" "mnist")
177+
"wmt" "mnist" "fineweb_edu_10B")
178178
VALID_WORKLOADS=("criteo1tb" "imagenet_resnet" "imagenet_resnet_silu" "imagenet_resnet_gelu" \
179179
"imagenet_resnet_large_bn_init" "imagenet_vit" "imagenet_vit_glu" \
180180
"imagenet_vit_post_ln" "imagenet_vit_map" "fastmri" "ogbg" \
@@ -185,7 +185,7 @@ VALID_WORKLOADS=("criteo1tb" "imagenet_resnet" "imagenet_resnet_silu" "imagenet_
185185
"librispeech_conformer_gelu" "fastmri_model_size" "fastmri_tanh" \
186186
"librispeech_deepspeech_tanh" \
187187
"librispeech_deepspeech_no_resnet" "librispeech_deepspeech_norm_and_spec_aug"
188-
"fastmri_layernorm" "ogbg_gelu" "ogbg_silu" "ogbg_model_size")
188+
"fastmri_layernorm" "ogbg_gelu" "ogbg_silu" "ogbg_model_size" "lm")
189189
VALID_RULESETS=("self" "external")
190190

191191
# Set data and experiment paths
@@ -221,7 +221,7 @@ TUNING_RULESET_FLAG="--tuning_ruleset=${TUNING_RULESET}"
221221
if [[ "${FRAMEWORK}" == "jax" ]]; then
222222
COMMAND_PREFIX="python"
223223
else
224-
COMMAND_PREFIX="torchrun --redirects 1:0,2:0,3:0,4:0,5:0,6:0,7:0 --standalone --nnodes=1 --nproc_per_node=8"
224+
COMMAND_PREFIX="torchrun --redirects 1:0,2:0,3:0 --standalone --nnodes=1 --nproc_per_node=4"
225225
fi
226226

227227
# Set data directory and bucket (bucket is only relevant in internal mode)

scoring/utils/run_workloads.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ def main(_):
270270
'docker run -t -d -v /home/kasimbeg/data/:/data/ '
271271
'-v /home/kasimbeg/experiment_runs/:/experiment_runs '
272272
'-v /home/kasimbeg/experiment_runs/logs:/logs '
273+
'-v /home/kasimbeg/algorithmic-efficiency:/algorithmic-efficiency'
273274
f'{mount_repo_flag}'
274275
'--gpus all --ipc=host '
275276
f'{docker_image_url} '

scoring/utils/workload_metadata_external_tuning.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,9 @@
3030
"librispeech_conformer": {
3131
"max_steps": 80000,
3232
"dataset": "librispeech"
33+
},
34+
"lm" : {
35+
"max_steps": 55000,
36+
"dataset":"fineweb_edu_10B"
3337
}
3438
}

0 commit comments

Comments
 (0)