|
1 | 1 | #!/bin/bash
|
2 |
| - |
| 2 | +export XLA_FLAGS="--xla_gpu_cuda_data_dir=/opt/conda" |
3 | 3 | keras_version=$(micromamba list | grep keras | tr -s ' ' | cut -d ' ' -f 3)
|
4 | 4 |
|
5 | 5 | git checkout tags/v$keras_version
|
6 | 6 | # Ref: https://keras.io/guides/, https://github.com/keras-team/keras-io/tree/master
|
7 | 7 | for file in *.py; do
|
8 |
| - if [ "$file" != "transfer_learning.py" ] && [ "$file" != "custom_train_step_in_torch.py" ]; then |
9 |
| - # skipping transfer_learning.py because it has 20 epochs and it takes a very long time to execute |
10 |
| - # https://github.com/keras-team/keras-io/blob/master/guides/transfer_learning.py#L562 |
11 |
| - # skipping custom_train_step_in_torch.py because there is a bug which causes error |
| 8 | + if [ "$file" != "transfer_learning.py" ] && |
| 9 | + # skipping transfer_learning.py because it has 20 epochs and it takes a very long time to execute |
| 10 | + # https://github.com/keras-team/keras-io/blob/master/guides/transfer_learning.py#L562 |
| 11 | + [ "$file" != "distributed_training_with_torch.py" ] && |
| 12 | + # skipping distributed_training_with_torch.py because there is a bug which causes error: |
| 13 | + # Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method |
| 14 | + [ "$file" != "custom_train_step_in_torch.py" ] && |
| 15 | + # skipping custom_train_step_in_torch.py because there is a bug which causes error: |
| 16 | + # AttributeError: 'list' object has no attribute 'shape' |
| 17 | + [ "$file" != "writing_a_custom_training_loop_in_torch.py" ]; then |
| 18 | + # skipping writing_a_custom_training_loop_in_torch.py because there is a bug which causes error: |
| 19 | + # AttributeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! |
12 | 20 | python "$file" || exit $?
|
13 | 21 | fi
|
14 | 22 | done
|
0 commit comments