Skip to content

Commit c466646

Browse files
authored
Merge pull request #49 from Wayfarer-Labs/waypoint_1_prep
Waypoint 1 prep
2 parents fa9f5d7 + f54576c commit c466646

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+4121
-928
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
model:
2+
model_id: dcae
3+
sample_size: [360, 640]
4+
channels: 3
5+
latent_size: 32
6+
latent_channels: 16
7+
8+
ch_0: 256
9+
ch_max: 2048
10+
11+
encoder_blocks_per_stage: [4, 4, 4, 8]
12+
decoder_blocks_per_stage: [4, 4, 4, 8]
13+
14+
use_middle_block: false
15+
do_channel_mask: false
16+
17+
train:
18+
trainer_id: rec
19+
data_id: video_dir_loader
20+
data_kwargs:
21+
source:
22+
- /mnt/data/datasets/extracted_tars/kbm/fps/*/*.mp4
23+
- /mnt/data/datasets/extracted_tars/kbm/3ps/*/*.mp4
24+
- /mnt/data/datasets/extracted_tars/kbm/other/*/*.mp4
25+
target_size: [360, 640]
26+
27+
target_batch_size: 32
28+
batch_size: 4
29+
30+
epochs: 200
31+
32+
#opt: AdamW
33+
#opt_kwargs:
34+
# lr: 3.0e-5
35+
# weight_decay: 1.0e-4
36+
# betas: [0.9, 0.95]
37+
# eps: 1.0e-15
38+
39+
opt: Muon
40+
opt_kwargs:
41+
lr: 1.0e-3
42+
momentum: 0.95
43+
adamw_lr: 1.0e-5
44+
adamw_wd: 1.0e-2
45+
adamw_eps: 1.0e-6
46+
adamw_betas: [0.9, 0.95]
47+
adamw_keys:
48+
- encoder.conv_in
49+
- encoder.conv_out
50+
- encoder.conv_out_logvar
51+
- decoder.conv_in
52+
- decoder.conv_out
53+
- .up.
54+
- .down.
55+
- .residuals.
56+
57+
lpips_type: convnext
58+
loss_weights:
59+
kl: 3.0e-7
60+
lpips: 12.0
61+
l2: 1.0
62+
dwt: 0.25
63+
64+
65+
scheduler: LinearWarmup
66+
scheduler_kwargs:
67+
warmup_steps: 3000
68+
min_lr: 1.0e-5
69+
70+
checkpoint_dir: checkpoints/waypoint_1_vae_owlc_f16_c16
71+
resume_ckpt: checkpoints/waypoint_1_vae_owlc_f16_c16/step_200000.pt
72+
73+
sample_interval: 1000
74+
save_interval: 5000
75+
76+
wandb:
77+
name: shahbuland
78+
project: new_vaes_v2
79+
run_name: waypoint_1_vae_owlc_f16_c16_highres
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
model:
2+
model_id: dcae
3+
sample_size: [360, 640]
4+
channels: 3
5+
latent_size: 32
6+
latent_channels: 16
7+
8+
ch_0: 64
9+
ch_max: 256
10+
11+
encoder_blocks_per_stage: [1, 1, 1, 1]
12+
decoder_blocks_per_stage: [1, 1, 1, 1]
13+
14+
use_middle_block: false
15+
do_channel_mask: false
16+
skip_logvar: false
17+
18+
train:
19+
trainer_id: distill_enc
20+
data_id: video_dir_loader
21+
data_kwargs:
22+
source:
23+
- /mnt/data/datasets/extracted_tars/kbm/fps/*/*.mp4
24+
- /mnt/data/datasets/extracted_tars/kbm/3ps/*/*.mp4
25+
- /mnt/data/datasets/extracted_tars/kbm/other/*/*.mp4
26+
target_size: [360, 640]
27+
28+
target_batch_size: 256
29+
batch_size: 32
30+
31+
epochs: 200
32+
33+
# opt: AdamW
34+
# opt_kwargs:
35+
# lr: 3.0e-5
36+
# weight_decay: 1.0e-4
37+
# betas: [0.9, 0.95]
38+
# eps: 1.0e-15
39+
40+
opt: Muon
41+
opt_kwargs:
42+
lr: 5.0e-4
43+
momentum: 0.95
44+
adamw_lr: 1.0e-4
45+
adamw_wd: 1.0e-2
46+
adamw_eps: 1.0e-6
47+
adamw_betas: [0.9, 0.95]
48+
adamw_keys:
49+
- conv_in
50+
- conv_out
51+
- conv_out_logvar
52+
- .down.
53+
- .residuals.
54+
55+
lpips_type: convnext
56+
loss_weights:
57+
l2: 1.0
58+
logvar: 1.0
59+
60+
scheduler: LinearWarmup
61+
scheduler_kwargs:
62+
warmup_steps: 1500
63+
min_lr: 1.0e-5
64+
65+
checkpoint_dir: checkpoints/waypoint_1_vae_owlc_f16_c16_enc_distill
66+
resume_ckpt: null #checkpoints/waypoint_1_vae_owlc_depth_c32_enc_distill/step_25000.pt
67+
68+
teacher_ckpt: checkpoints/waypoint_1_vae_owlc_f16_c16/step_300000.pt
69+
teacher_cfg: configs/waypoint_1/owl_vae_f16_c16.yml
70+
71+
sample_interval: 1000
72+
save_interval: 5000
73+
74+
latent_scale: 1.0
75+
latent_shift: 0.0
76+
77+
wandb:
78+
name: shahbuland
79+
project: new_vaes_v2
80+
run_name: waypoint_1_vae_owlc_f16_c16_enc_distill_highlr

configs/waypoint_1/owlc_rgb.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ model:
22
model_id: dcae
33
sample_size: [360, 640]
44
channels: 3
5-
latent_size: 16
5+
latent_size: 8
66
latent_channels: 64
77

88
ch_0: 256
@@ -22,6 +22,7 @@ train:
2222
- /mnt/data/datasets/extracted_tars/kbm/fps/*/*.mp4
2323
- /mnt/data/datasets/extracted_tars/kbm/3ps/*/*.mp4
2424
- /mnt/data/datasets/extracted_tars/kbm/other/*/*.mp4
25+
target_size: [360, 640]
2526

2627
target_batch_size: 32
2728
batch_size: 4
@@ -48,7 +49,7 @@ train:
4849
min_lr: 3.0e-6
4950

5051
checkpoint_dir: checkpoints/waypoint_1_vae_owlc_rgb
51-
resume_ckpt: null
52+
resume_ckpt: checkpoints/waypoint_1_vae_owlc_rgb/step_460000.pt
5253

5354
sample_interval: 1000
5455
save_interval: 5000
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
model:
2+
model_id: causal_diffdec
3+
sample_size: [360, 640]
4+
channels: 3
5+
latent_size: 16
6+
latent_channels: 64
7+
patch_size: [20,20]
8+
kernel: null
9+
10+
n_layers: 16
11+
n_heads: 16
12+
d_model: 1024
13+
14+
causal: true
15+
mimetic_init: false
16+
block_size: null
17+
backbone: dit
18+
rope_impl: image+latent
19+
dropout: 0.1
20+
shuffle_factor: 1
21+
cfg_prob: 0.1
22+
n_frames: 2
23+
24+
train:
25+
trainer_id: caus_diffdec_depth
26+
data_id: video_dir_loader
27+
data_kwargs:
28+
source:
29+
- /mnt/data/waypoint_1/data/MKIF/*/*.mp4
30+
target_size: [360, 640]
31+
window_length: 2
32+
eval_data_id: video_dir_loader
33+
eval_data_kwargs:
34+
source:
35+
- /mnt/data/waypoint_1/data/MKIF/*/*.mp4
36+
target_size: [360, 640]
37+
window_length: 16
38+
39+
target_batch_size: 16
40+
batch_size: 2
41+
42+
epochs: 200
43+
44+
# opt: AdamW
45+
# opt_kwargs:
46+
# lr: 1.0e-4
47+
# weight_decay: 1.0e-4
48+
# betas: [0.9, 0.95]
49+
# eps: 1.0e-15
50+
51+
opt: Muon
52+
opt_kwargs:
53+
lr: 1.0e-2
54+
momentum: 0.95
55+
adamw_lr: 1.0e-4
56+
adamw_wd: 1.0e-4
57+
adamw_eps: 1.0e-6
58+
adamw_betas: [0.9, 0.95]
59+
adamw_keys:
60+
- core.proj_in
61+
- core.proj_in_z
62+
- core.proj_out
63+
- core.ts_embed
64+
- core.final
65+
- core.null_emb
66+
- .adaln1
67+
- .adaln2
68+
- .gate1
69+
- .gate2
70+
71+
72+
checkpoint_dir: checkpoints/wp1_720p_diffdec
73+
resume_ckpt: null # checkpoints/wp1_720p_dec_distill/step_10000.pt
74+
75+
sample_interval: 100
76+
save_interval: 5000
77+
78+
teacher_cfg: configs/waypoint_1/wp1_depth.yml
79+
teacher_ckpt: checkpoints/waypoint_1_vae_owlc_depth/step_390000.pt
80+
81+
latent_scale: 0.69
82+
ldm_scale: 1.0
83+
sampling_steps: 20
84+
cfg_scale: 1.5
85+
use_proxy: false
86+
87+
wandb:
88+
name: shahbuland
89+
project: hq_vaes
90+
run_name: wp1_720p_diffdec

configs/waypoint_1/wp1_diffdec.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
model:
2+
model_id: diff_dec
3+
sample_size: [45, 80]
4+
channels: 16
5+
latent_size: 16
6+
latent_channels: 64
7+
patch_size: [5,2]
8+
9+
n_layers: 28
10+
n_heads: 24
11+
d_model: 1536
12+
13+
causal: false
14+
mimetic_init: false
15+
block_size: null
16+
backbone: dit
17+
rope_impl: image+latent
18+
dropout: 0.1
19+
shuffle_factor: 1
20+
cfg_prob: 0.1
21+
22+
train:
23+
trainer_id: proxy_diffdec
24+
data_id: video_dir_loader
25+
data_kwargs:
26+
source:
27+
- /mnt/data/waypoint_1/data/MKIF/*/*.mp4
28+
target_size: [360, 640]
29+
30+
target_batch_size: 128
31+
batch_size: 16
32+
33+
epochs: 200
34+
35+
# opt: AdamW
36+
# opt_kwargs:
37+
# lr: 1.0e-4
38+
# weight_decay: 1.0e-4
39+
# betas: [0.9, 0.95]
40+
# eps: 1.0e-15
41+
42+
opt: Muon
43+
opt_kwargs:
44+
lr: 1.0e-2
45+
momentum: 0.95
46+
adamw_lr: 1.0e-4
47+
adamw_wd: 1.0e-4
48+
adamw_eps: 1.0e-6
49+
adamw_betas: [0.9, 0.95]
50+
adamw_keys:
51+
- core.proj_in
52+
- core.proj_in_z
53+
- core.proj_out
54+
- core.ts_embed
55+
- core.final
56+
- core.null_emb
57+
- .adaln1
58+
- .adaln2
59+
- .gate1
60+
- .gate2
61+
62+
63+
checkpoint_dir: checkpoints/wp1_720p_diffdec
64+
resume_ckpt: null # checkpoints/wp1_720p_dec_distill/step_10000.pt
65+
66+
sample_interval: 100
67+
save_interval: 5000
68+
69+
teacher_cfg: configs/waypoint_1/wp1_depth.yml
70+
teacher_ckpt: checkpoints/waypoint_1_vae_owlc_depth/step_390000.pt
71+
72+
latent_scale: 0.69
73+
ldm_scale: 1.0
74+
sampling_steps: 20
75+
cfg_scale: 1.5
76+
77+
wandb:
78+
name: wayfarerlabs
79+
project: hd_vaes
80+
run_name: wp1_720p_diffdec

0 commit comments

Comments
 (0)