@@ -350,7 +350,6 @@ def forward(self, x: Tensor, **kwargs) -> Tensor:
350350def get_default_model_kwargs ():
351351 return dict (
352352 channels = 128 ,
353- patch_blocks = 1 ,
354353 patch_factor = 16 ,
355354 multipliers = [1 , 2 , 4 , 4 , 4 , 4 , 4 ],
356355 factors = [4 , 4 , 4 , 2 , 2 , 2 ],
@@ -360,11 +359,6 @@ def get_default_model_kwargs():
360359 attention_features = 64 ,
361360 attention_multiplier = 2 ,
362361 attention_use_rel_pos = False ,
363- resnet_groups = 8 ,
364- kernel_multiplier_downsample = 2 ,
365- use_nearest_upsample = False ,
366- use_skip_scale = True ,
367- use_context_time = True ,
368362 diffusion_type = "v" ,
369363 diffusion_sigma_distribution = UniformDistribution (),
370364 )
@@ -416,13 +410,6 @@ def decode(self, *args, **kwargs):
416410class AudioDiffusionMAE (DiffusionMAE1d ):
417411 def __init__ (self , * args , ** kwargs ):
418412 default_kwargs = dict (
419- patch_blocks = 1 ,
420- patch_factor = 1 ,
421- resnet_groups = 8 ,
422- kernel_multiplier_downsample = 2 ,
423- use_nearest_upsample = False ,
424- use_skip_scale = True ,
425- use_context_time = True ,
426413 diffusion_type = "v" ,
427414 diffusion_sigma_distribution = UniformDistribution (),
428415 stft_num_fft = 1023 ,
@@ -470,8 +457,6 @@ def __init__(self, in_channels: int, **kwargs):
470457 stft_num_fft = 1023 ,
471458 stft_hop_length = 256 ,
472459 channels = 512 ,
473- patch_blocks = 1 ,
474- patch_factor = 1 ,
475460 multipliers = [3 , 2 , 1 , 1 , 1 , 1 , 1 , 1 ],
476461 factors = [1 , 2 , 2 , 2 , 2 , 2 , 2 ],
477462 num_blocks = [1 , 1 , 1 , 1 , 1 , 1 , 1 ],
@@ -480,11 +465,6 @@ def __init__(self, in_channels: int, **kwargs):
480465 attention_features = 64 ,
481466 attention_multiplier = 2 ,
482467 attention_use_rel_pos = False ,
483- resnet_groups = 8 ,
484- kernel_multiplier_downsample = 2 ,
485- use_nearest_upsample = False ,
486- use_skip_scale = True ,
487- use_context_time = True ,
488468 diffusion_type = "v" ,
489469 diffusion_sigma_distribution = UniformDistribution (),
490470 )
0 commit comments