huggingface
diff --git a/‎tests/test_models.py‎
Lines changed: 9 additions & 3 deletions b/‎tests/test_models.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎timm/models/dpn.py‎
Lines changed: 5 additions & 3 deletions b/‎timm/models/dpn.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎timm/models/gluon_xception.py‎
Lines changed: 25 additions & 41 deletions b/‎timm/models/gluon_xception.py‎
Lines changed: 25 additions & 41 deletions
diff --git a/‎timm/models/helpers.py‎
Lines changed: 49 additions & 45 deletions b/‎timm/models/helpers.py‎
Lines changed: 49 additions & 45 deletions
diff --git a/‎timm/models/inception_resnet_v2.py‎
Lines changed: 5 additions & 3 deletions b/‎timm/models/inception_resnet_v2.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎timm/models/inception_v4.py‎
Lines changed: 3 additions & 2 deletions b/‎timm/models/inception_v4.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎timm/models/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎timm/models/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion
@@ -83,7 +83,6 @@ def test_model_default_cfgs(model_name, batch_size):
     cfg = model.default_cfg
 
     classifier = cfg['classifier']
-    first_conv = cfg['first_conv']
     pool_size = cfg['pool_size']
     input_size = model.default_cfg['input_size']
 
@@ -111,9 +110,16 @@ def test_model_default_cfgs(model_name, batch_size):
             # FIXME mobilenetv3 forward_features vs removed pooling differ
             assert outputs.shape[-1] == pool_size[-1] and outputs.shape[-2] == pool_size[-2]
 
-    # check classifier and first convolution names match those in default_cfg
+    # check classifier name matches default_cfg
     assert classifier + ".weight" in state_dict.keys(), f'{classifier} not in model params'
-    assert first_conv + ".weight" in state_dict.keys(), f'{first_conv} not in model params'
+
+    # check first conv(s) names match default_cfg
+    first_conv = cfg['first_conv']
+    if isinstance(first_conv, str):
+        first_conv = (first_conv,)
+    assert isinstance(first_conv, (tuple, list))
+    for fc in first_conv:
+        assert fc + ".weight" in state_dict.keys(), f'{fc} not in model params'
 
 
 if 'GITHUB_ACTIONS' not in os.environ:
 
@@ -7,6 +7,7 @@
 Hacked together by / Copyright 2020 Ross Wightman
 """
 from collections import OrderedDict
+from functools import partial
 from typing import Tuple
 
 import torch
@@ -173,12 +174,14 @@ def __init__(self, small=False, num_init_features=64, k_r=96, groups=32,
         self.drop_rate = drop_rate
         self.b = b
         assert output_stride == 32  # FIXME look into dilation support
+        norm_layer = partial(BatchNormAct2d, eps=.001)
+        fc_norm_layer = partial(BatchNormAct2d, eps=.001, act_layer=fc_act, inplace=False)
         bw_factor = 1 if small else 4
         blocks = OrderedDict()
 
         # conv1
         blocks['conv1_1'] = ConvBnAct(
-            in_chans, num_init_features, kernel_size=3 if small else 7, stride=2, norm_kwargs=dict(eps=.001))
+            in_chans, num_init_features, kernel_size=3 if small else 7, stride=2, norm_layer=norm_layer)
         blocks['conv1_pool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
         self.feature_info = [dict(num_chs=num_init_features, reduction=2, module='features.conv1_1')]
 
@@ -226,8 +229,7 @@ def __init__(self, small=False, num_init_features=64, k_r=96, groups=32,
             in_chs += inc
         self.feature_info += [dict(num_chs=in_chs, reduction=32, module=f'features.conv5_{k_sec[3]}')]
 
-        def _fc_norm(f, eps): return BatchNormAct2d(f, eps=eps, act_layer=fc_act, inplace=False)
-        blocks['conv5_bn_ac'] = CatBnAct(in_chs, norm_layer=_fc_norm)
+        blocks['conv5_bn_ac'] = CatBnAct(in_chs, norm_layer=fc_norm_layer)
 
         self.num_features = in_chs
         self.features = nn.Sequential(blocks)
 
@@ -42,10 +42,8 @@
 
 
 class SeparableConv2d(nn.Module):
-    def __init__(self, inplanes, planes, kernel_size=3, stride=1,
-                 dilation=1, bias=False, norm_layer=None, norm_kwargs=None):
+    def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, bias=False, norm_layer=None):
         super(SeparableConv2d, self).__init__()
-        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
         self.kernel_size = kernel_size
         self.dilation = dilation
 
@@ -54,7 +52,7 @@ def __init__(self, inplanes, planes, kernel_size=3, stride=1,
         self.conv_dw = nn.Conv2d(
             inplanes, inplanes, kernel_size, stride=stride,
             padding=padding, dilation=dilation, groups=inplanes, bias=bias)
-        self.bn = norm_layer(num_features=inplanes, **norm_kwargs)
+        self.bn = norm_layer(num_features=inplanes)
         # pointwise convolution
         self.conv_pw = nn.Conv2d(inplanes, planes, kernel_size=1, bias=bias)
 
@@ -66,10 +64,8 @@ def forward(self, x):
 
 
 class Block(nn.Module):
-    def __init__(self, inplanes, planes, stride=1, dilation=1, start_with_relu=True,
-                 norm_layer=None, norm_kwargs=None, ):
+    def __init__(self, inplanes, planes, stride=1, dilation=1, start_with_relu=True, norm_layer=None):
         super(Block, self).__init__()
-        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
         if isinstance(planes, (list, tuple)):
             assert len(planes) == 3
         else:
@@ -80,17 +76,16 @@ def __init__(self, inplanes, planes, stride=1, dilation=1, start_with_relu=True,
             self.skip = nn.Sequential()
             self.skip.add_module('conv1', nn.Conv2d(
                 inplanes, outplanes, 1, stride=stride, bias=False)),
-            self.skip.add_module('bn1', norm_layer(num_features=outplanes, **norm_kwargs))
+            self.skip.add_module('bn1', norm_layer(num_features=outplanes))
         else:
             self.skip = None
 
         rep = OrderedDict()
         for i in range(3):
             rep['act%d' % (i + 1)] = nn.ReLU(inplace=True)
             rep['conv%d' % (i + 1)] = SeparableConv2d(
-                inplanes, planes[i], 3, stride=stride if i == 2 else 1, dilation=dilation,
-                norm_layer=norm_layer, norm_kwargs=norm_kwargs)
-            rep['bn%d' % (i + 1)] = norm_layer(planes[i], **norm_kwargs)
+                inplanes, planes[i], 3, stride=stride if i == 2 else 1, dilation=dilation, norm_layer=norm_layer)
+            rep['bn%d' % (i + 1)] = norm_layer(planes[i])
             inplanes = planes[i]
 
         if not start_with_relu:
@@ -115,74 +110,63 @@ class Xception65(nn.Module):
     """
 
     def __init__(self, num_classes=1000, in_chans=3, output_stride=32, norm_layer=nn.BatchNorm2d,
-                 norm_kwargs=None, drop_rate=0., global_pool='avg'):
+                 drop_rate=0., global_pool='avg'):
         super(Xception65, self).__init__()
         self.num_classes = num_classes
         self.drop_rate = drop_rate
-        norm_kwargs = norm_kwargs if norm_kwargs is not None else {}
         if output_stride == 32:
             entry_block3_stride = 2
             exit_block20_stride = 2
-            middle_block_dilation = 1
-            exit_block_dilations = (1, 1)
+            middle_dilation = 1
+            exit_dilation = (1, 1)
         elif output_stride == 16:
             entry_block3_stride = 2
             exit_block20_stride = 1
-            middle_block_dilation = 1
-            exit_block_dilations = (1, 2)
+            middle_dilation = 1
+            exit_dilation = (1, 2)
         elif output_stride == 8:
             entry_block3_stride = 1
             exit_block20_stride = 1
-            middle_block_dilation = 2
-            exit_block_dilations = (2, 4)
+            middle_dilation = 2
+            exit_dilation = (2, 4)
         else:
             raise NotImplementedError
 
         # Entry flow
         self.conv1 = nn.Conv2d(in_chans, 32, kernel_size=3, stride=2, padding=1, bias=False)
-        self.bn1 = norm_layer(num_features=32, **norm_kwargs)
+        self.bn1 = norm_layer(num_features=32)
         self.act1 = nn.ReLU(inplace=True)
 
         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False)
         self.bn2 = norm_layer(num_features=64)
         self.act2 = nn.ReLU(inplace=True)
 
-        self.block1 = Block(
-            64, 128, stride=2, start_with_relu=False, norm_layer=norm_layer, norm_kwargs=norm_kwargs)
+        self.block1 = Block(64, 128, stride=2, start_with_relu=False, norm_layer=norm_layer)
         self.block1_act = nn.ReLU(inplace=True)
-        self.block2 = Block(
-            128, 256, stride=2, start_with_relu=False, norm_layer=norm_layer, norm_kwargs=norm_kwargs)
-        self.block3 = Block(
-            256, 728, stride=entry_block3_stride, norm_layer=norm_layer, norm_kwargs=norm_kwargs)
+        self.block2 = Block(128, 256, stride=2, start_with_relu=False, norm_layer=norm_layer)
+        self.block3 = Block(256, 728, stride=entry_block3_stride, norm_layer=norm_layer)
 
         # Middle flow
         self.mid = nn.Sequential(OrderedDict([('block%d' % i, Block(
-            728, 728, stride=1, dilation=middle_block_dilation,
-            norm_layer=norm_layer, norm_kwargs=norm_kwargs)) for i in range(4, 20)]))
+            728, 728, stride=1, dilation=middle_dilation, norm_layer=norm_layer)) for i in range(4, 20)]))
 
         # Exit flow
         self.block20 = Block(
-            728, (728, 1024, 1024), stride=exit_block20_stride, dilation=exit_block_dilations[0],
-            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
+            728, (728, 1024, 1024), stride=exit_block20_stride, dilation=exit_dilation[0], norm_layer=norm_layer)
         self.block20_act = nn.ReLU(inplace=True)
 
-        self.conv3 = SeparableConv2d(
-            1024, 1536, 3, stride=1, dilation=exit_block_dilations[1],
-            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
-        self.bn3 = norm_layer(num_features=1536, **norm_kwargs)
+        self.conv3 = SeparableConv2d(1024, 1536, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer)
+        self.bn3 = norm_layer(num_features=1536)
         self.act3 = nn.ReLU(inplace=True)
 
-        self.conv4 = SeparableConv2d(
-            1536, 1536, 3, stride=1, dilation=exit_block_dilations[1],
-            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
-        self.bn4 = norm_layer(num_features=1536, **norm_kwargs)
+        self.conv4 = SeparableConv2d(1536, 1536, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer)
+        self.bn4 = norm_layer(num_features=1536)
         self.act4 = nn.ReLU(inplace=True)
 
         self.num_features = 2048
         self.conv5 = SeparableConv2d(
-            1536, self.num_features, 3, stride=1, dilation=exit_block_dilations[1],
-            norm_layer=norm_layer, norm_kwargs=norm_kwargs)
-        self.bn5 = norm_layer(num_features=self.num_features, **norm_kwargs)
+            1536, self.num_features, 3, stride=1, dilation=exit_dilation[1], norm_layer=norm_layer)
+        self.bn5 = norm_layer(num_features=self.num_features)
         self.act5 = nn.ReLU(inplace=True)
         self.feature_info = [
             dict(num_chs=64, reduction=2, module='act2'),
 
@@ -148,6 +148,31 @@ def load_custom_pretrained(model, cfg=None, load_fn=None, progress=False, check_
         _logger.warning("Valid function to load pretrained weights is not available, using random initialization.")
 
 
+def adapt_input_conv(in_chans, conv_weight):
+    conv_type = conv_weight.dtype
+    conv_weight = conv_weight.float()  # Some weights are in torch.half, ensure it's float for sum on CPU
+    O, I, J, K = conv_weight.shape
+    if in_chans == 1:
+        if I > 3:
+            assert conv_weight.shape[1] % 3 == 0
+            # For models with space2depth stems
+            conv_weight = conv_weight.reshape(O, I // 3, 3, J, K)
+            conv_weight = conv_weight.sum(dim=2, keepdim=False)
+        else:
+            conv_weight = conv_weight.sum(dim=1, keepdim=True)
+    elif in_chans != 3:
+        if I != 3:
+            raise NotImplementedError('Weight format not supported by conversion.')
+        else:
+            # NOTE this strategy should be better than random init, but there could be other combinations of
+            # the original RGB input layer weights that'd work better for specific cases.
+            repeat = int(math.ceil(in_chans / 3))
+            conv_weight = conv_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
+            conv_weight *= (3 / float(in_chans))
+    conv_weight = conv_weight.to(conv_type)
+    return conv_weight
+
+
 def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True, progress=False):
     if cfg is None:
         cfg = getattr(model, 'default_cfg')
@@ -159,56 +184,35 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non
     if filter_fn is not None:
         state_dict = filter_fn(state_dict)
 
-    if in_chans == 1:
-        conv1_name = cfg['first_conv']
-        _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name)
-        conv1_weight = state_dict[conv1_name + '.weight']
-        # Some weights are in torch.half, ensure it's float for sum on CPU
-        conv1_type = conv1_weight.dtype
-        conv1_weight = conv1_weight.float()
-        O, I, J, K = conv1_weight.shape
-        if I > 3:
-            assert conv1_weight.shape[1] % 3 == 0
-            # For models with space2depth stems
-            conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K)
-            conv1_weight = conv1_weight.sum(dim=2, keepdim=False)
-        else:
-            conv1_weight = conv1_weight.sum(dim=1, keepdim=True)
-        conv1_weight = conv1_weight.to(conv1_type)
-        state_dict[conv1_name + '.weight'] = conv1_weight
-    elif in_chans != 3:
-        conv1_name = cfg['first_conv']
-        conv1_weight = state_dict[conv1_name + '.weight']
-        conv1_type = conv1_weight.dtype
-        conv1_weight = conv1_weight.float()
-        O, I, J, K = conv1_weight.shape
-        if I != 3:
-            _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name)
-            del state_dict[conv1_name + '.weight']
-            strict = False
-        else:
-            # NOTE this strategy should be better than random init, but there could be other combinations of
-            # the original RGB input layer weights that'd work better for specific cases.
-            _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name)
-            repeat = int(math.ceil(in_chans / 3))
-            conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
-            conv1_weight *= (3 / float(in_chans))
-            conv1_weight = conv1_weight.to(conv1_type)
-            state_dict[conv1_name + '.weight'] = conv1_weight
+    input_convs = cfg.get('first_conv', None)
+    if input_convs is not None:
+        if isinstance(input_convs, str):
+            input_convs = (input_convs,)
+        for input_conv_name in input_convs:
+            weight_name = input_conv_name + '.weight'
+            try:
+                state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name])
+                _logger.info(
+                    f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)')
+            except NotImplementedError as e:
+                del state_dict[weight_name]
+                strict = False
+                _logger.warning(
+                    f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.')
 
     classifier_name = cfg['classifier']
-    if num_classes == 1000 and cfg['num_classes'] == 1001:
-        # FIXME this special case is problematic as number of pretrained weight sources increases
-        # special case for imagenet trained models with extra background class in pretrained weights
-        classifier_weight = state_dict[classifier_name + '.weight']
-        state_dict[classifier_name + '.weight'] = classifier_weight[1:]
-        classifier_bias = state_dict[classifier_name + '.bias']
-        state_dict[classifier_name + '.bias'] = classifier_bias[1:]
-    elif num_classes != cfg['num_classes']:
-        # completely discard fully connected for all other differences between pretrained and created model
+    label_offset = cfg.get('label_offset', 0)
+    if num_classes != cfg['num_classes']:
+        # completely discard fully connected if model num_classes doesn't match pretrained weights
         del state_dict[classifier_name + '.weight']
         del state_dict[classifier_name + '.bias']
         strict = False
+    elif label_offset > 0:
+        # special case for pretrained weights with an extra background class in pretrained weights
+        classifier_weight = state_dict[classifier_name + '.weight']
+        state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:]
+        classifier_bias = state_dict[classifier_name + '.bias']
+        state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:]
 
     model.load_state_dict(state_dict, strict=strict)
 
 
@@ -17,18 +17,20 @@
     # ported from http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz
     'inception_resnet_v2': {
         'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/inception_resnet_v2-940b1cd6.pth',
-        'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
+        'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
         'crop_pct': 0.8975, 'interpolation': 'bicubic',
         'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
         'first_conv': 'conv2d_1a.conv', 'classifier': 'classif',
+        'label_offset': 1,  # 1001 classes in pretrained weights
     },
     # ported from http://download.tensorflow.org/models/ens_adv_inception_resnet_v2_2017_08_18.tar.gz
     'ens_adv_inception_resnet_v2': {
         'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ens_adv_inception_resnet_v2-2592a550.pth',
-        'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
+        'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
         'crop_pct': 0.8975, 'interpolation': 'bicubic',
         'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
         'first_conv': 'conv2d_1a.conv', 'classifier': 'classif',
+        'label_offset': 1,  # 1001 classes in pretrained weights
     }
 }
 
@@ -222,7 +224,7 @@ def forward(self, x):
 
 
 class InceptionResnetV2(nn.Module):
-    def __init__(self, num_classes=1001, in_chans=3, drop_rate=0., output_stride=32, global_pool='avg'):
+    def __init__(self, num_classes=1000, in_chans=3, drop_rate=0., output_stride=32, global_pool='avg'):
         super(InceptionResnetV2, self).__init__()
         self.drop_rate = drop_rate
         self.num_classes = num_classes
 
@@ -16,10 +16,11 @@
 default_cfgs = {
     'inception_v4': {
         'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/inceptionv4-8e4777a0.pth',
-        'num_classes': 1001, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
+        'num_classes': 1000, 'input_size': (3, 299, 299), 'pool_size': (8, 8),
         'crop_pct': 0.875, 'interpolation': 'bicubic',
         'mean': IMAGENET_INCEPTION_MEAN, 'std': IMAGENET_INCEPTION_STD,
         'first_conv': 'features.0.conv', 'classifier': 'last_linear',
+        'label_offset': 1,  # 1001 classes in pretrained weights
     }
 }
 
@@ -241,7 +242,7 @@ def forward(self, x):
 
 
 class InceptionV4(nn.Module):
-    def __init__(self, num_classes=1001, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg'):
+    def __init__(self, num_classes=1000, in_chans=3, output_stride=32, drop_rate=0., global_pool='avg'):
         super(InceptionV4, self).__init__()
         assert output_stride == 32
         self.drop_rate = drop_rate
 
@@ -12,7 +12,7 @@
 from .create_act import create_act_layer, get_act_layer, get_act_fn
 from .create_attn import get_attn, create_attn
 from .create_conv2d import create_conv2d
-from .create_norm_act import create_norm_act, get_norm_act_layer
+from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act
 from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path
 from .eca import EcaModule, CecaModule
 from .evo_norm import EvoNormBatch2d, EvoNormSample2d