Skip to content

Commit 9914f74

Browse files
committed
Add more maxxvit weights includ ConvNeXt conv block based experiments.
1 parent 145c4d8 commit 9914f74

File tree

2 files changed

+46
-18
lines changed

2 files changed

+46
-18
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,21 @@ And a big thanks to all GitHub sponsors who helped with some of my costs before
2121

2222
## What's New
2323

24+
### Oct 10, 2022
25+
* More weights in `maxxvit` series, incl first ConvNeXt block based `coatnext` and `maxxvit` experiments:
26+
* `coatnext_nano_rw_224` - 82.0 @ 224 (G) -- (uses ConvNeXt conv block, no BatchNorm)
27+
* `maxxvit_nano_rw_256` - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)
28+
* `maxvit_rmlp_small_rw_224` - 84.5 @ 224, 85.1 @ 320 (G)
29+
* `maxxvit_small_rw_256` - 84.6 @ 256, 84.9 @ 288 (G) -- could be trained better, hparams need tuning (uses ConvNeXt block, no BN)
30+
* `coatnet_rmlp_2_rw_224` - 84.6 @ 224, 85 @ 320 (T)
31+
32+
### Sept 23, 2022
33+
* LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)
34+
* vit_base_patch32_224_clip_laion2b
35+
* vit_large_patch14_224_clip_laion2b
36+
* vit_huge_patch14_224_clip_laion2b
37+
* vit_giant_patch14_224_clip_laion2b
38+
2439
### Sept 7, 2022
2540
* Hugging Face [`timm` docs](https://huggingface.co/docs/hub/timm) home now exists, look for more here in the future
2641
* Add BEiT-v2 weights for base and large 224x224 models from https://github.com/microsoft/unilm/tree/master/beit2

timm/models/maxxvit.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,13 @@ def _cfg(url='', **kwargs):
9595
'coatnet_rmlp_0_rw_224': _cfg(url=''),
9696
'coatnet_rmlp_1_rw_224': _cfg(
9797
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pth'),
98-
'coatnet_rmlp_2_rw_224': _cfg(url=''),
98+
'coatnet_rmlp_2_rw_224': _cfg(
99+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_2_rw_224_sw-5ccfac55.pth'),
99100
'coatnet_rmlp_3_rw_224': _cfg(url=''),
100101
'coatnet_nano_cc_224': _cfg(url=''),
101-
'coatnext_nano_rw_224': _cfg(url=''),
102+
'coatnext_nano_rw_224': _cfg(
103+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnext_nano_rw_224_ad-22cb71c2.pth',
104+
crop_pct=0.9),
102105

103106
# Trying to be like the CoAtNet paper configs
104107
'coatnet_0_224': _cfg(url=''),
@@ -128,16 +131,22 @@ def _cfg(url='', **kwargs):
128131
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-bbef0ff5.pth',
129132
input_size=(3, 256, 256), pool_size=(8, 8)),
130133
'maxvit_rmlp_small_rw_224': _cfg(
131-
url=''),
134+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_small_rw_224_sw-6ef0ae4f.pth',
135+
crop_pct=0.9,
136+
),
132137
'maxvit_rmlp_small_rw_256': _cfg(
133138
url='',
134139
input_size=(3, 256, 256), pool_size=(8, 8)),
135140

136141
'maxvit_tiny_pm_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
137142

138-
'maxxvit_nano_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
139-
'maxxvit_tiny_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
140-
'maxxvit_small_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
143+
'maxxvit_rmlp_nano_rw_256': _cfg(
144+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_nano_rw_256_sw-0325d459.pth',
145+
input_size=(3, 256, 256), pool_size=(8, 8)),
146+
'maxxvit_rmlp_tiny_rw_256': _cfg(url='', input_size=(3, 256, 256), pool_size=(8, 8)),
147+
'maxxvit_rmlp_small_rw_256': _cfg(
148+
url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_small_rw_256_sw-37e217ff.pth',
149+
input_size=(3, 256, 256), pool_size=(8, 8)),
141150

142151
# Trying to be like the MaxViT paper configs
143152
'maxvit_tiny_224': _cfg(url=''),
@@ -334,21 +343,22 @@ def _next_cfg(
334343
rel_pos_dim=512,
335344
):
336345
# For experimental models with convnext instead of mbconv
346+
init_values = to_2tuple(init_values)
337347
return dict(
338348
conv_cfg=MaxxVitConvCfg(
339349
block_type='convnext',
340350
stride_mode=stride_mode,
341351
pool_type=pool_type,
342352
expand_output=False,
343-
init_values=init_values,
353+
init_values=init_values[0],
344354
norm_layer=conv_norm_layer,
345355
norm_layer_cl=conv_norm_layer_cl,
346356
),
347357
transformer_cfg=MaxxVitTransformerCfg(
348358
expand_first=False,
349359
pool_type=pool_type,
350360
window_size=window_size,
351-
init_values=init_values,
361+
init_values=init_values[1],
352362
norm_layer=transformer_norm_layer,
353363
norm_layer_cl=transformer_norm_layer_cl,
354364
rel_pos_type=rel_pos_type,
@@ -497,7 +507,10 @@ def _next_cfg(
497507
depths=(3, 4, 6, 3),
498508
stem_width=(32, 64),
499509
weight_init='normal',
500-
**_next_cfg(),
510+
**_next_cfg(
511+
rel_pos_type='bias',
512+
init_values=(1e-5, None)
513+
),
501514
),
502515

503516
# Trying to be like the CoAtNet paper configs
@@ -612,22 +625,22 @@ def _next_cfg(
612625
**_rw_max_cfg(),
613626
),
614627

615-
maxxvit_nano_rw_256=MaxxVitCfg(
628+
maxxvit_rmlp_nano_rw_256=MaxxVitCfg(
616629
embed_dim=(64, 128, 256, 512),
617630
depths=(1, 2, 3, 1),
618631
block_type=('M',) * 4,
619632
stem_width=(32, 64),
620633
weight_init='normal',
621634
**_next_cfg(),
622635
),
623-
maxxvit_tiny_rw_256=MaxxVitCfg(
636+
maxxvit_rmlp_tiny_rw_256=MaxxVitCfg(
624637
embed_dim=(64, 128, 256, 512),
625638
depths=(2, 2, 5, 2),
626639
block_type=('M',) * 4,
627640
stem_width=(32, 64),
628641
**_next_cfg(),
629642
),
630-
maxxvit_small_rw_256=MaxxVitCfg(
643+
maxxvit_rmlp_small_rw_256=MaxxVitCfg(
631644
embed_dim=(96, 192, 384, 768),
632645
depths=(2, 2, 5, 2),
633646
block_type=('M',) * 4,
@@ -1861,18 +1874,18 @@ def maxvit_tiny_pm_256(pretrained=False, **kwargs):
18611874

18621875

18631876
@register_model
1864-
def maxxvit_nano_rw_256(pretrained=False, **kwargs):
1865-
return _create_maxxvit('maxxvit_nano_rw_256', pretrained=pretrained, **kwargs)
1877+
def maxxvit_rmlp_nano_rw_256(pretrained=False, **kwargs):
1878+
return _create_maxxvit('maxxvit_rmlp_nano_rw_256', pretrained=pretrained, **kwargs)
18661879

18671880

18681881
@register_model
1869-
def maxxvit_tiny_rw_256(pretrained=False, **kwargs):
1870-
return _create_maxxvit('maxxvit_tiny_rw_256', pretrained=pretrained, **kwargs)
1882+
def maxxvit_rmlp_tiny_rw_256(pretrained=False, **kwargs):
1883+
return _create_maxxvit('maxxvit_rmlp_tiny_rw_256', pretrained=pretrained, **kwargs)
18711884

18721885

18731886
@register_model
1874-
def maxxvit_small_rw_256(pretrained=False, **kwargs):
1875-
return _create_maxxvit('maxxvit_small_rw_256', pretrained=pretrained, **kwargs)
1887+
def maxxvit_rmlp_small_rw_256(pretrained=False, **kwargs):
1888+
return _create_maxxvit('maxxvit_rmlp_small_rw_256', pretrained=pretrained, **kwargs)
18761889

18771890

18781891
@register_model

0 commit comments

Comments
 (0)