Skip to content

Commit 03b9bb0

Browse files
refactor: remove autoencoder, remove ddsp, default to learned time embedding
1 parent 425e559 commit 03b9bb0

File tree

5 files changed

+28
-485
lines changed

5 files changed

+28
-485
lines changed

README.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ https://colab.research.google.com/gist/flavioschneider/39c6454bfc2d03dc7d0c5c9d8
2222
```py
2323
from audio_diffusion_pytorch import AudioDiffusionModel
2424

25-
model = AudioDiffusionModel()
25+
model = AudioDiffusionModel(in_channels=1)
2626

2727
# Train model with audio sources
28-
x = torch.randn(2, 1, 2 ** 18) # [batch, channels, samples], 2**18 ≈ 12s of audio at a frequency of 22050
28+
x = torch.randn(2, 1, 2 ** 18) # [batch, in_channels, samples], 2**18 ≈ 12s of audio at a frequency of 22050Hz
2929
loss = model(x)
3030
loss.backward() # Do this many times
3131

@@ -46,22 +46,21 @@ from audio_diffusion_pytorch import UNet1d
4646
# UNet used to denoise our 1D (audio) data
4747
unet = UNet1d(
4848
in_channels=1,
49-
patch_size=16,
5049
channels=128,
50+
patch_size=16,
51+
kernel_sizes_init=[1, 3, 7],
5152
multipliers=[1, 2, 4, 4, 4, 4, 4],
5253
factors=[4, 4, 4, 2, 2, 2],
5354
attentions=[False, False, False, True, True, True],
5455
num_blocks=[2, 2, 2, 2, 2, 2],
5556
attention_heads=8,
5657
attention_features=64,
5758
attention_multiplier=2,
59+
use_attention_bottleneck=True,
5860
resnet_groups=8,
5961
kernel_multiplier_downsample=2,
60-
kernel_sizes_init=[1, 3, 7],
6162
use_nearest_upsample=False,
6263
use_skip_scale=True,
63-
use_attention_bottleneck=True,
64-
use_learned_time_embedding=True,
6564
)
6665

6766
x = torch.randn(3, 1, 2 ** 16)

audio_diffusion_pytorch/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@
1111
Schedule,
1212
SpanBySpanComposer,
1313
)
14-
from .model import AudioAutoEncoderModel, AudioDiffusionModel, Model1d
15-
from .modules import AutoEncoder1d, Encoder1d, UNet1d
14+
from .model import AudioDiffusionModel, Model1d
15+
from .modules import Encoder1d, UNet1d

audio_diffusion_pytorch/ddsp.py

Lines changed: 0 additions & 241 deletions
This file was deleted.

0 commit comments

Comments
 (0)