@@ -95,10 +95,13 @@ def _cfg(url='', **kwargs):
9595 'coatnet_rmlp_0_rw_224' : _cfg (url = '' ),
9696 'coatnet_rmlp_1_rw_224' : _cfg (
9797 url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pth' ),
98- 'coatnet_rmlp_2_rw_224' : _cfg (url = '' ),
98+ 'coatnet_rmlp_2_rw_224' : _cfg (
99+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_2_rw_224_sw-5ccfac55.pth' ),
99100 'coatnet_rmlp_3_rw_224' : _cfg (url = '' ),
100101 'coatnet_nano_cc_224' : _cfg (url = '' ),
101- 'coatnext_nano_rw_224' : _cfg (url = '' ),
102+ 'coatnext_nano_rw_224' : _cfg (
103+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnext_nano_rw_224_ad-22cb71c2.pth' ,
104+ crop_pct = 0.9 ),
102105
103106 # Trying to be like the CoAtNet paper configs
104107 'coatnet_0_224' : _cfg (url = '' ),
@@ -128,16 +131,22 @@ def _cfg(url='', **kwargs):
128131 url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-bbef0ff5.pth' ,
129132 input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
130133 'maxvit_rmlp_small_rw_224' : _cfg (
131- url = '' ),
134+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_small_rw_224_sw-6ef0ae4f.pth' ,
135+ crop_pct = 0.9 ,
136+ ),
132137 'maxvit_rmlp_small_rw_256' : _cfg (
133138 url = '' ,
134139 input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
135140
136141 'maxvit_tiny_pm_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
137142
138- 'maxxvit_nano_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
139- 'maxxvit_tiny_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
140- 'maxxvit_small_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
143+ 'maxxvit_rmlp_nano_rw_256' : _cfg (
144+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_nano_rw_256_sw-0325d459.pth' ,
145+ input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
146+ 'maxxvit_rmlp_tiny_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
147+ 'maxxvit_rmlp_small_rw_256' : _cfg (
148+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_small_rw_256_sw-37e217ff.pth' ,
149+ input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
141150
142151 # Trying to be like the MaxViT paper configs
143152 'maxvit_tiny_224' : _cfg (url = '' ),
@@ -334,21 +343,22 @@ def _next_cfg(
334343 rel_pos_dim = 512 ,
335344):
336345 # For experimental models with convnext instead of mbconv
346+ init_values = to_2tuple (init_values )
337347 return dict (
338348 conv_cfg = MaxxVitConvCfg (
339349 block_type = 'convnext' ,
340350 stride_mode = stride_mode ,
341351 pool_type = pool_type ,
342352 expand_output = False ,
343- init_values = init_values ,
353+ init_values = init_values [ 0 ] ,
344354 norm_layer = conv_norm_layer ,
345355 norm_layer_cl = conv_norm_layer_cl ,
346356 ),
347357 transformer_cfg = MaxxVitTransformerCfg (
348358 expand_first = False ,
349359 pool_type = pool_type ,
350360 window_size = window_size ,
351- init_values = init_values ,
361+ init_values = init_values [ 1 ] ,
352362 norm_layer = transformer_norm_layer ,
353363 norm_layer_cl = transformer_norm_layer_cl ,
354364 rel_pos_type = rel_pos_type ,
@@ -497,7 +507,10 @@ def _next_cfg(
497507 depths = (3 , 4 , 6 , 3 ),
498508 stem_width = (32 , 64 ),
499509 weight_init = 'normal' ,
500- ** _next_cfg (),
510+ ** _next_cfg (
511+ rel_pos_type = 'bias' ,
512+ init_values = (1e-5 , None )
513+ ),
501514 ),
502515
503516 # Trying to be like the CoAtNet paper configs
@@ -612,22 +625,22 @@ def _next_cfg(
612625 ** _rw_max_cfg (),
613626 ),
614627
615- maxxvit_nano_rw_256 = MaxxVitCfg (
628+ maxxvit_rmlp_nano_rw_256 = MaxxVitCfg (
616629 embed_dim = (64 , 128 , 256 , 512 ),
617630 depths = (1 , 2 , 3 , 1 ),
618631 block_type = ('M' ,) * 4 ,
619632 stem_width = (32 , 64 ),
620633 weight_init = 'normal' ,
621634 ** _next_cfg (),
622635 ),
623- maxxvit_tiny_rw_256 = MaxxVitCfg (
636+ maxxvit_rmlp_tiny_rw_256 = MaxxVitCfg (
624637 embed_dim = (64 , 128 , 256 , 512 ),
625638 depths = (2 , 2 , 5 , 2 ),
626639 block_type = ('M' ,) * 4 ,
627640 stem_width = (32 , 64 ),
628641 ** _next_cfg (),
629642 ),
630- maxxvit_small_rw_256 = MaxxVitCfg (
643+ maxxvit_rmlp_small_rw_256 = MaxxVitCfg (
631644 embed_dim = (96 , 192 , 384 , 768 ),
632645 depths = (2 , 2 , 5 , 2 ),
633646 block_type = ('M' ,) * 4 ,
@@ -1861,18 +1874,18 @@ def maxvit_tiny_pm_256(pretrained=False, **kwargs):
18611874
18621875
18631876@register_model
1864- def maxxvit_nano_rw_256 (pretrained = False , ** kwargs ):
1865- return _create_maxxvit ('maxxvit_nano_rw_256 ' , pretrained = pretrained , ** kwargs )
1877+ def maxxvit_rmlp_nano_rw_256 (pretrained = False , ** kwargs ):
1878+ return _create_maxxvit ('maxxvit_rmlp_nano_rw_256 ' , pretrained = pretrained , ** kwargs )
18661879
18671880
18681881@register_model
1869- def maxxvit_tiny_rw_256 (pretrained = False , ** kwargs ):
1870- return _create_maxxvit ('maxxvit_tiny_rw_256 ' , pretrained = pretrained , ** kwargs )
1882+ def maxxvit_rmlp_tiny_rw_256 (pretrained = False , ** kwargs ):
1883+ return _create_maxxvit ('maxxvit_rmlp_tiny_rw_256 ' , pretrained = pretrained , ** kwargs )
18711884
18721885
18731886@register_model
1874- def maxxvit_small_rw_256 (pretrained = False , ** kwargs ):
1875- return _create_maxxvit ('maxxvit_small_rw_256 ' , pretrained = pretrained , ** kwargs )
1887+ def maxxvit_rmlp_small_rw_256 (pretrained = False , ** kwargs ):
1888+ return _create_maxxvit ('maxxvit_rmlp_small_rw_256 ' , pretrained = pretrained , ** kwargs )
18761889
18771890
18781891@register_model
0 commit comments