-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathsweep.sh
More file actions
67 lines (46 loc) · 1.62 KB
/
sweep.sh
File metadata and controls
67 lines (46 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
## Sweep 1. is attention useful?
loglrs=(-8 -7 -6 -5 -4 -3 -2)
MODEL_WIDTHS=(32 64 128)
for loglr in "${loglrs[@]}"; do
for attn in "True" "False"; do
for width in "${MODEL_WIDTHS[@]}"; do
lr=$(python -c "import math; print(2**${loglr})")
run_name="exp_vae_ch_${width}_lr_${lr}_attn_${attn}"
echo "Running ${run_name}"
torchrun --nproc_per_node=8 vae_trainer.py \
--learning_rate_vae ${lr} \
--vae_ch ${width} \
--run_name ${run_name} \
--num_epochs 20 \
--max_steps 2000 \
--evaluate_every_n_steps 250 \
--batch_size 32 \
--do_clamp \
--do_attn ${attn} \
--project_name "vae_sweep_attn_lr_width"
done
done
done
## Sweep 2. Can we initialize better?
loglrs=(-8 -7 -6 -5 -4 -3 -2)
MODEL_WIDTHS=(64)
for loglr in "${loglrs[@]}"; do
for attn in "True" "False"; do
for width in "${MODEL_WIDTHS[@]}"; do
lr=$(python -c "import math; print(2**${loglr})")
run_name="exp_vae_ch_${width}_lr_${lr}_attn_${attn}"
echo "Running ${run_name}"
torchrun --nproc_per_node=8 vae_trainer.py \
--learning_rate_vae ${lr} \
--vae_ch ${width} \
--run_name ${run_name} \
--num_epochs 20 \
--max_steps 2000 \
--evaluate_every_n_steps 250 \
--batch_size 32 \
--do_clamp \
--do_attn ${attn} \
--project_name "vae_sweep_attn_lr_width"
done
done
done