|
34 | 34 |
|
35 | 35 | # COMMAND ---------- |
36 | 36 |
|
37 | | -# MAGIC !wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb -O /tmp/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb && \ |
38 | | -# MAGIC wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcublas-dev-11-3_11.5.1.109-1_amd64.deb -O /tmp/libcublas-dev-11-3_11.5.1.109-1_amd64.deb && \ |
39 | | -# MAGIC wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb -O /tmp/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb && \ |
40 | | -# MAGIC wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcurand-dev-11-3_10.2.4.109-1_amd64.deb -O /tmp/libcurand-dev-11-3_10.2.4.109-1_amd64.deb && \ |
41 | | -# MAGIC dpkg -i /tmp/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb && \ |
42 | | -# MAGIC dpkg -i /tmp/libcublas-dev-11-3_11.5.1.109-1_amd64.deb && \ |
43 | | -# MAGIC dpkg -i /tmp/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb && \ |
44 | | -# MAGIC dpkg -i /tmp/libcurand-dev-11-3_10.2.4.109-1_amd64.deb |
| 37 | +!wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb -O /tmp/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb && \ |
| 38 | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcublas-dev-11-3_11.5.1.109-1_amd64.deb -O /tmp/libcublas-dev-11-3_11.5.1.109-1_amd64.deb && \ |
| 39 | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb -O /tmp/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb && \ |
| 40 | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libcurand-dev-11-3_10.2.4.109-1_amd64.deb -O /tmp/libcurand-dev-11-3_10.2.4.109-1_amd64.deb && \ |
| 41 | + dpkg -i /tmp/libcusparse-dev-11-3_11.5.0.58-1_amd64.deb && \ |
| 42 | + dpkg -i /tmp/libcublas-dev-11-3_11.5.1.109-1_amd64.deb && \ |
| 43 | + dpkg -i /tmp/libcusolver-dev-11-3_11.1.2.109-1_amd64.deb && \ |
| 44 | + dpkg -i /tmp/libcurand-dev-11-3_10.2.4.109-1_amd64.deb |
45 | 45 |
|
46 | 46 | # COMMAND ---------- |
47 | 47 |
|
|
148 | 148 |
|
149 | 149 | # configure the batch_size |
150 | 150 | batch_size = 3 |
151 | | -if gpu_family == "a100": |
| 151 | +if gpu_family == "a10": |
| 152 | + batch_size = 4 |
| 153 | +elif gpu_family == "a100": |
152 | 154 | batch_size = 6 |
153 | 155 |
|
154 | 156 | # configure num_gpus, if specified |
|
167 | 169 |
|
168 | 170 | # COMMAND ---------- |
169 | 171 |
|
170 | | -# MAGIC !deepspeed {num_gpus_flag} \ |
171 | | -# MAGIC --module training.trainer \ |
172 | | -# MAGIC --input-model {input_model} \ |
173 | | -# MAGIC --deepspeed {deepspeed_config} \ |
174 | | -# MAGIC --epochs 2 \ |
175 | | -# MAGIC --local-output-dir {local_output_dir} \ |
176 | | -# MAGIC --dbfs-output-dir {dbfs_output_dir} \ |
177 | | -# MAGIC --per-device-train-batch-size {batch_size} \ |
178 | | -# MAGIC --per-device-eval-batch-size {batch_size} \ |
179 | | -# MAGIC --logging-steps 10 \ |
180 | | -# MAGIC --save-steps 200 \ |
181 | | -# MAGIC --save-total-limit 20 \ |
182 | | -# MAGIC --eval-steps 50 \ |
183 | | -# MAGIC --warmup-steps 50 \ |
184 | | -# MAGIC --test-size 200 \ |
185 | | -# MAGIC --lr 5e-6 |
| 172 | +!deepspeed {num_gpus_flag} \ |
| 173 | + --module training.trainer \ |
| 174 | + --input-model {input_model} \ |
| 175 | + --deepspeed {deepspeed_config} \ |
| 176 | + --epochs 2 \ |
| 177 | + --local-output-dir {local_output_dir} \ |
| 178 | + --dbfs-output-dir {dbfs_output_dir} \ |
| 179 | + --per-device-train-batch-size {batch_size} \ |
| 180 | + --per-device-eval-batch-size {batch_size} \ |
| 181 | + --logging-steps 10 \ |
| 182 | + --save-steps 200 \ |
| 183 | + --save-total-limit 20 \ |
| 184 | + --eval-steps 50 \ |
| 185 | + --warmup-steps 50 \ |
| 186 | + --test-size 200 \ |
| 187 | + --lr 5e-6 |
186 | 188 |
|
187 | 189 | # COMMAND ---------- |
188 | 190 |
|
|
0 commit comments