Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
4630846
add video dataset downloader as submodule
thegenerativegeneration Apr 4, 2023
d3b6d0e
Merge branch 'main' of github.com:thegenerativegeneration/Thin-Plate-…
thegenerativegeneration Jun 14, 2023
429a5d1
change model definitions and training
thegenerativegeneration Jul 8, 2023
f6deb7b
change model definitions and training
thegenerativegeneration Jul 12, 2023
ca4f60a
fix scheduler resuming
thegenerativegeneration Jul 12, 2023
8e215fb
add a few things
thegenerativegeneration Jul 12, 2023
74c3f5d
fix adding additional layers
thegenerativegeneration Jul 13, 2023
84d5aee
revert to avgpool
thegenerativegeneration Jul 13, 2023
40bd97c
gan loss
thegenerativegeneration Jul 13, 2023
94031ee
add some losses
thegenerativegeneration Jul 13, 2023
6dad8b4
add lots of stuff
thegenerativegeneration Jul 19, 2023
26635bf
change pyyaml version
thegenerativegeneration Jul 20, 2023
1cc1644
update
thegenerativegeneration Sep 8, 2023
7a237df
update reqs
thegenerativegeneration Sep 8, 2023
9d5a459
remove albumentations
thegenerativegeneration Sep 8, 2023
8fbecab
add torchinfo
thegenerativegeneration Sep 8, 2023
50d4eca
pin all dependencies
thegenerativegeneration Sep 8, 2023
87badb9
pin all dependencies
thegenerativegeneration Sep 8, 2023
ad87d38
add missing requirement
thegenerativegeneration Sep 8, 2023
1c208f0
add missing requirement
thegenerativegeneration Sep 8, 2023
2352e28
add missing requirement
thegenerativegeneration Sep 8, 2023
e989d16
add missing requirement
thegenerativegeneration Sep 8, 2023
09054ba
remove bitsandbytes
thegenerativegeneration Sep 8, 2023
0981387
add another config file
thegenerativegeneration Sep 12, 2023
39fd117
update config file
thegenerativegeneration Sep 12, 2023
f63f938
fix import
thegenerativegeneration Sep 13, 2023
122f95e
fix yaml loading
thegenerativegeneration Sep 13, 2023
23329fc
reduce batch size
thegenerativegeneration Sep 13, 2023
92da8ba
fix logger
thegenerativegeneration Sep 13, 2023
989c942
1024 finetune config
thegenerativegeneration Sep 21, 2023
3179ac9
add 1536 config file
thegenerativegeneration Sep 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "video-preprocessing"]
path = video-preprocessing
url = https://github.com/AliaksandrSiarohin/video-preprocessing.git
92 changes: 92 additions & 0 deletions config/vox-1024-deeper.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
dataset_params:
root_dir: ./vox512_filtered_webp
frame_shape: 1024,1024,3
id_sampling: True
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1


model_params:
common_params:
num_tps: 10
num_channels: 3
bg: True
multi_mask: True
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 4
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25 # might make sense to set to 0.5 because of the additional occlusion (4=>5)
occlusion_num: 5
avd_network_params:
id_bottle_size: 128
pose_bottle_size: 128



train_params:
num_epochs: 80
num_repeats: 2
lr_generator: 2.0e-5
lr_discriminator: 2.0e-5
batch_size: 1
scales: [1, 0.5, 0.25, 0.125, 0.0625, 0.03125]
dataloader_workers: 8
checkpoint_freq: 5
dropout_epoch: 0
dropout_maxp: 0.3
dropout_startp: 0.1
dropout_inc_epoch: 10
bg_start: 101
freeze_kp_detector: True
freeze_bg_predictor: True
freeze_dense_motion: False
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
perceptual: [5, 5, 5, 5, 5]
equivariance_value: 10
warp_loss: 10
bg: 0
l2: 0
id: 0.1
huber: 0
generator_gan: 10
generator_feat_match: 100
discriminator_gan: 10
optimizer: 'adamw'
optimizer_params:
betas: [ 0.9, 0.999 ]
weight_decay: 1.0e-3
scheduler: 'onecycle'
scheduler_params:
pct_start: 0.01

train_avd_params:
num_epochs: 100
num_repeats: 1
batch_size: 8
dataloader_workers: 6
checkpoint_freq: 1
epoch_milestones: [10, 20]
lr: 1.0e-3
lambda_shift: 1
random_scale: 0.25

visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
93 changes: 93 additions & 0 deletions config/vox-1024-finetune.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Use this file to finetune from a pretrained 256x256 model
name: vox-1024-finetune
dataset_params:
root_dir: ./video-preprocessing/vox2-768
frame_shape: 1024,1024,3
id_sampling: True
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1


model_params:
common_params:
num_tps: 10
num_channels: 3
bg: True
multi_mask: True
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 3
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
avd_network_params:
id_bottle_size: 128
pose_bottle_size: 128


train_params:
visualize_model: False
num_epochs: 50
num_repeats: 1
# Higher LR seems to bring problems when finetuning
lr_generator: 2.0e-6
lr_discriminator: 2.0e-5
batch_size: 1
scales: [1, 0.5, 0.25, 0.125, 0.0625]
dataloader_workers: 8
checkpoint_freq: 1
dropout_epoch: 0
dropout_maxp: 0.3
dropout_startp: 0.1
dropout_inc_epoch: 0
bg_start: 81
freeze_kp_detector: True
freeze_bg_predictor: True
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
warp_loss: 10
bg: 10
id: 0.1
l2: 0
huber: 0
generator_gan: 0
generator_feat_match: 0
discriminator_gan: 0
optimizer: 'adamw'
optimizer_params:
betas: [ 0.9, 0.999 ]
weight_decay: 0.1
scheduler: 'onecycle'
scheduler_params:
pct_start: 0.01

train_avd_params:
num_epochs: 200
num_repeats: 1
batch_size: 1
dataloader_workers: 6
checkpoint_freq: 1
epoch_milestones: [140, 180]
lr: 1.0e-3
lambda_shift: 1
random_scale: 0.25

visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
93 changes: 93 additions & 0 deletions config/vox-1536-finetune.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Use this file to finetune from a pretrained 256x256 model
name: vox-1536-finetune
dataset_params:
root_dir: ./video-preprocessing/vox2-768
frame_shape: 1536,1536,3
id_sampling: True
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1


model_params:
common_params:
num_tps: 10
num_channels: 3
bg: True
multi_mask: True
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 3
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25
avd_network_params:
id_bottle_size: 128
pose_bottle_size: 128


train_params:
visualize_model: False
num_epochs: 50
num_repeats: 1
# Higher LR seems to bring problems when finetuning
lr_generator: 2.0e-6
lr_discriminator: 2.0e-5
batch_size: 1
scales: [1, 0.5, 0.25, 0.125, 0.0625]
dataloader_workers: 8
checkpoint_freq: 1
dropout_epoch: 0
dropout_maxp: 0.3
dropout_startp: 0.1
dropout_inc_epoch: 0
bg_start: 81
freeze_kp_detector: True
freeze_bg_predictor: True
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
warp_loss: 10
bg: 10
id: 0.1
l2: 0
huber: 0
generator_gan: 0
generator_feat_match: 0
discriminator_gan: 0
optimizer: 'adamw'
optimizer_params:
betas: [ 0.9, 0.999 ]
weight_decay: 0.1
scheduler: 'onecycle'
scheduler_params:
pct_start: 0.01

train_avd_params:
num_epochs: 200
num_repeats: 1
batch_size: 1
dataloader_workers: 6
checkpoint_freq: 1
epoch_milestones: [140, 180]
lr: 1.0e-3
lambda_shift: 1
random_scale: 0.25

visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
98 changes: 98 additions & 0 deletions config/vox-256-deeper-other.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: vox-256-deeper-other

dataset_params:
root_dir: ../data/vox512_webp
frame_shape: 256,256,3
id_sampling: True
augmentation_params:
flip_param:
horizontal_flip: True
time_flip: True
jitter_param:
brightness: 0.1
contrast: 0.1
saturation: 0.1
hue: 0.1


model_params:
common_params:
num_tps: 10
num_channels: 3
bg: True
multi_mask: True
generator_params:
block_expansion: 64
max_features: 512
num_down_blocks: 4
concat_encode: True
skip_block_type: depthwise
dropout: 0.1
dense_motion_params:
block_expansion: 64
max_features: 1024
num_blocks: 5
scale_factor: 0.25 # might make sense to set to 0.5 because of the additional occlusion (4=>5)
occlusion_num: 5

avd_network_params:
id_bottle_size: 128
pose_bottle_size: 128


train_params:
num_epochs: 100
num_repeats: 5
lr_generator: 2.0e-4
lr_discriminator: 2.0e-4
batch_size: 8
scales: [1, 0.5, 0.25, 0.125]
dataloader_workers: 8
checkpoint_freq: 10
dropout_epoch: 30
dropout_maxp: 0.3
dropout_startp: 0.1
dropout_inc_epoch: 10
bg_start: 101
freeze_kp_detector: True
freeze_bg_predictor: True
freeze_dense_motion: False
transform_params:
sigma_affine: 0.05
sigma_tps: 0.005
points_tps: 5
loss_weights:
perceptual: [10, 10, 10, 10, 10]
equivariance_value: 10
warp_loss: 10
bg: 0
l2: 0
id: 0.1
huber: 0
generator_gan: 1
generator_feat_match: 0
discriminator_gan: 1
optimizer: 'adamw'
optimizer_params:
betas: [ 0.9, 0.999 ]
weight_decay: 1.0e-3
scheduler: 'onecycle'
scheduler_params:
pct_start: 0.3


train_avd_params:
num_epochs: 100
num_repeats: 1
batch_size: 8
dataloader_workers: 6
checkpoint_freq: 1
epoch_milestones: [10, 20]
lr: 1.0e-3
lambda_shift: 1
random_scale: 0.25

visualizer_params:
kp_size: 5
draw_border: True
colormap: 'gist_rainbow'
Loading