fix some model

brianhou0208 · brianhou0208 · commit c9f9c30dfad1 · 2025-06-13T12:38:18.000+08:00
diff --git a/timm/models/byobnet.py b/timm/models/byobnet.py
@@ -44,7 +44,7 @@
 )
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import checkpoint, checkpoint_seq, named_apply
+from ._manipulate import named_apply, checkpoint_seq
 from ._registry import generate_default_cfgs, register_model
 
 __all__ = ['ByobNet', 'ByoModelCfg', 'ByoBlockCfg', 'create_byob_stem', 'create_block']
@@ -1385,7 +1385,7 @@ def forward_intermediates(
         for stage in stages:
             feat_idx += 1
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stage, x)
             else:
                 x = stage(x)
             if not exclude_final_conv and feat_idx == last_idx:
diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
@@ -212,7 +212,7 @@ def forward_intermediates(
             blocks = self.blocks[:max_index]
         for feat_idx, blk in enumerate(blocks, start=1):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(blk, x)
+                x = checkpoint_seq(blk, x, flatten=True)
             else:
                 x = blk(x)
             if feat_idx in take_indices:
diff --git a/timm/models/efficientvit_mit.py b/timm/models/efficientvit_mit.py
@@ -790,7 +790,7 @@ def forward_intermediates(
  
         for feat_idx, stage in enumerate(stages):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stages, x)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
@@ -947,7 +947,7 @@ def forward_intermediates(
  
         for feat_idx, stage in enumerate(stages):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stages, x)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/ghostnet.py b/timm/models/ghostnet.py
@@ -728,7 +728,7 @@ def forward_intermediates(
 
         for feat_idx, stage in enumerate(stages, start=1):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stage, x, flatten=True)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/hieradet_sam2.py b/timm/models/hieradet_sam2.py
@@ -1,21 +1,20 @@
 import math
 from copy import deepcopy
 from functools import partial
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.jit import Final
 
 from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 from timm.layers import PatchEmbed, Mlp, DropPath, ClNormMlpClassifierHead, LayerScale, \
     get_norm_layer, get_act_layer, init_weight_jax, init_weight_vit, to_2tuple, use_fused_attn
 
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import named_apply, checkpoint_seq, adapt_input_conv
-from ._registry import generate_default_cfgs, register_model, register_model_deprecations
+from ._manipulate import named_apply, checkpoint
+from ._registry import generate_default_cfgs, register_model
 
 
 def window_partition(x, window_size: Tuple[int, int]):
@@ -471,7 +470,10 @@ def forward_intermediates(
         else:
             blocks = self.blocks[:max_index + 1]
         for i, blk in enumerate(blocks):
-            x = blk(x)
+            if self.grad_checkpointing and not torch.jit.is_scripting():
+                x = checkpoint(blk, x)
+            else:
+                x = blk(x)
             if i in take_indices:
                 x_out = x.permute(0, 3, 1, 2) if output_fmt == 'NCHW' else x
                 intermediates.append(x_out)
@@ -503,8 +505,11 @@ def prune_intermediate_layers(
     def forward_features(self, x: torch.Tensor) -> torch.Tensor:
         x = self.patch_embed(x)  # BHWC
         x = self._pos_embed(x)
-        for i, blk in enumerate(self.blocks):
-            x = blk(x)
+        for blk in self.blocks:
+            if self.grad_checkpointing and not torch.jit.is_scripting():
+                x = checkpoint(blk, x)
+            else:
+                x = blk(x)
         return x
 
     def forward_head(self, x, pre_logits: bool = False) -> torch.Tensor:
diff --git a/timm/models/inception_resnet_v2.py b/timm/models/inception_resnet_v2.py
@@ -5,7 +5,6 @@
 from functools import partial
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from timm.data import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
 from timm.layers import create_classifier, ConvNormAct
diff --git a/timm/models/inception_v3.py b/timm/models/inception_v3.py
@@ -4,7 +4,6 @@
 Licensed BSD-Clause 3 https://github.com/pytorch/vision/blob/master/LICENSE
 """
 from functools import partial
-from typing import Optional
 
 import torch
 import torch.nn as nn
diff --git a/timm/models/nasnet.py b/timm/models/nasnet.py
@@ -3,11 +3,9 @@
  https://github.com/Cadene/pretrained-models.pytorch
 """
 from functools import partial
-from typing import Optional
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from timm.layers import ConvNormAct, create_conv2d, create_pool2d, create_classifier
 from ._builder import build_model_with_cfg
diff --git a/timm/models/nextvit.py b/timm/models/nextvit.py
@@ -17,7 +17,7 @@
 from timm.layers import ClassifierHead
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import checkpoint, checkpoint_seq
+from ._manipulate import checkpoint_seq
 from ._registry import generate_default_cfgs, register_model
 
 __all__ = ['NextViT']
@@ -595,7 +595,7 @@ def forward_intermediates(
 
         for feat_idx, stage in enumerate(stages):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stage, x)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/nfnet.py b/timm/models/nfnet.py
@@ -215,7 +215,7 @@ def create_stem(
     if 'deep' in stem_type:
         if 'quad' in stem_type:
             # 4 deep conv stack as in NFNet-F models
-            assert not 'pool' in stem_type
+            assert 'pool' not in stem_type
             stem_chs = (out_chs // 8, out_chs // 4, out_chs // 2, out_chs)
             strides = (2, 1, 1, 2)
             stem_stride = 4
diff --git a/timm/models/pnasnet.py b/timm/models/pnasnet.py
@@ -10,7 +10,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from timm.layers import ConvNormAct, create_conv2d, create_pool2d, create_classifier
 from ._builder import build_model_with_cfg
diff --git a/timm/models/rdnet.py b/timm/models/rdnet.py
@@ -281,6 +281,27 @@ def __init__(
 
         named_apply(partial(_init_weights, head_init_scale=head_init_scale), self)
 
+    @torch.jit.ignore
+    def group_matcher(self, coarse=False):
+        assert not coarse, "coarse grouping is not implemented for RDNet"
+        return dict(
+            stem=r'^stem',
+            blocks=r'^dense_stages\.(\d+)',
+        )
+
+    @torch.jit.ignore
+    def set_grad_checkpointing(self, enable=True):
+        for s in self.dense_stages:
+            s.grad_checkpointing = enable
+
+    @torch.jit.ignore
+    def get_classifier(self) -> nn.Module:
+        return self.head.fc
+
+    def reset_classifier(self, num_classes: int, global_pool: Optional[str] = None):
+        self.num_classes = num_classes
+        self.head.reset(num_classes, global_pool)
+
     def forward_intermediates(
             self,
             x: torch.Tensor,
@@ -350,14 +371,6 @@ def prune_intermediate_layers(
             self.reset_classifier(0, '')
         return take_indices
 
-    @torch.jit.ignore
-    def get_classifier(self) -> nn.Module:
-        return self.head.fc
-
-    def reset_classifier(self, num_classes: int, global_pool: Optional[str] = None):
-        self.num_classes = num_classes
-        self.head.reset(num_classes, global_pool)
-
     def forward_features(self, x):
         x = self.stem(x)
         x = self.dense_stages(x)
@@ -372,19 +385,6 @@ def forward(self, x):
         x = self.forward_head(x)
         return x
 
-    @torch.jit.ignore
-    def group_matcher(self, coarse=False):
-        assert not coarse, "coarse grouping is not implemented for RDNet"
-        return dict(
-            stem=r'^stem',
-            blocks=r'^dense_stages\.(\d+)',
-        )
-
-    @torch.jit.ignore
-    def set_grad_checkpointing(self, enable=True):
-        for s in self.dense_stages:
-            s.grad_checkpointing = enable
-
 
 def _init_weights(module, name=None, head_init_scale=1.0):
     if isinstance(module, nn.Conv2d):
diff --git a/timm/models/repghost.py b/timm/models/repghost.py
@@ -17,7 +17,7 @@
 from ._builder import build_model_with_cfg
 from ._efficientnet_blocks import SqueezeExcite, ConvBnAct
 from ._features import feature_take_indices
-from ._manipulate import checkpoint, checkpoint_seq
+from ._manipulate import checkpoint_seq
 from ._registry import register_model, generate_default_cfgs
 
 __all__ = ['RepGhostNet']
@@ -337,7 +337,7 @@ def forward_intermediates(
 
         for feat_idx, stage in enumerate(stages, start=1):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stage, x, flatten=True)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py
@@ -41,7 +41,7 @@
     DropPath, AvgPool2dSame, create_pool2d, StdConv2d, create_conv2d, get_act_layer, get_norm_act_layer, make_divisible
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import checkpoint, checkpoint_seq, named_apply, adapt_input_conv
+from ._manipulate import checkpoint_seq, named_apply, adapt_input_conv
 from ._registry import generate_default_cfgs, register_model, register_model_deprecations
 
 __all__ = ['ResNetV2']  # model_registry will add each entrypoint fn to this
@@ -586,7 +586,7 @@ def forward_intermediates(
 
         for feat_idx, stage in enumerate(stages, start=1):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stage, x, flatten=True)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/starnet.py b/timm/models/starnet.py
@@ -19,7 +19,7 @@
 from timm.layers import DropPath, SelectAdaptivePool2d, Linear, LayerType, trunc_normal_
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import checkpoint, checkpoint_seq
+from ._manipulate import checkpoint_seq
 from ._registry import register_model, generate_default_cfgs
 
 __all__ = ['StarNet']
@@ -199,7 +199,7 @@ def forward_intermediates(
 
         for feat_idx, stage in enumerate(stages):
             if self.grad_checkpointing and not torch.jit.is_scripting():
-                x = checkpoint(stage, x)
+                x = checkpoint_seq(stages, x, flatten=True)
             else:
                 x = stage(x)
             if feat_idx in take_indices:
diff --git a/timm/models/tresnet.py b/timm/models/tresnet.py
@@ -15,7 +15,7 @@
 from timm.layers import SpaceToDepth, BlurPool2d, ClassifierHead, SEModule, ConvNormAct, DropPath
 from ._builder import build_model_with_cfg
 from ._features import feature_take_indices
-from ._manipulate import checkpoint_seq
+from ._manipulate import checkpoint, checkpoint_seq
 from ._registry import register_model, generate_default_cfgs, register_model_deprecations
 
 __all__ = ['TResNet']  # model_registry will add each entrypoint fn to this
@@ -263,7 +263,10 @@ def forward_intermediates(
             stages = self.body[:max_index + 1]
 
         for feat_idx, stage in enumerate(stages):
-            x = stage(x)
+            if self.grad_checkpointing and not torch.jit.is_scripting():
+                x = checkpoint(stage, x)
+            else:
+                x = stage(x)
             if feat_idx in take_indices:
                 intermediates.append(x)
 
diff --git a/timm/models/xcit.py b/timm/models/xcit.py
@@ -497,7 +497,10 @@ def forward_intermediates(
         # NOTE not supporting return of class tokens
         x = torch.cat((self.cls_token.expand(B, -1, -1), x), dim=1)
         for blk in self.cls_attn_blocks:
-            x = blk(x)
+            if self.grad_checkpointing and not torch.jit.is_scripting():
+                x = checkpoint(blk, x)
+            else:
+                x = blk(x)
 
         x = self.norm(x)