wip: depth_anything_v2 init lint fixes

blessedcoolant · blessedcoolant · commit 4166c756ce94 · 2024-07-25T14:41:22.000+05:30
diff --git a/invokeai/backend/image_util/depth_anything/v2/dinov2.py b/invokeai/backend/image_util/depth_anything/v2/dinov2.py
@@ -7,7 +7,7 @@
 #   https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
 #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
 
-import logging
+
 import math
 from functools import partial
 from typing import Callable, Sequence, Tuple, Union
@@ -17,11 +17,8 @@
 import torch.utils.checkpoint
 from torch.nn.init import trunc_normal_
 
-from .dinov2_layers import MemEffAttention, Mlp
-from .dinov2_layers import NestedTensorBlock as Block
-from .dinov2_layers import PatchEmbed, SwiGLUFFNFused
-
-logger = logging.getLogger("dinov2")
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers import MemEffAttention, Mlp, PatchEmbed, SwiGLUFFNFused
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers import NestedTensorBlock as Block
 
 
 def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
@@ -120,13 +117,10 @@ def __init__(
             dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
 
         if ffn_layer == "mlp":
-            logger.info("using MLP layer as FFN")
             ffn_layer = Mlp
         elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
-            logger.info("using SwiGLU layer as FFN")
             ffn_layer = SwiGLUFFNFused
         elif ffn_layer == "identity":
-            logger.info("using Identity layer as FFN")
 
             def f(*args, **kwargs):
                 return nn.Identity()
@@ -232,13 +226,13 @@ def prepare_tokens_with_masks(self, x, masks=None):
         return x
 
     def forward_features_list(self, x_list, masks_list):
-        x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
+        x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list, strict=False)]
         for blk in self.blocks:
             x = blk(x)
 
         all_x = x
         output = []
-        for x, masks in zip(all_x, masks_list):
+        for x, masks in zip(all_x, masks_list, strict=False):
             x_norm = self.norm(x)
             output.append(
                 {
@@ -301,7 +295,7 @@ def get_intermediate_layers(
         n: Union[int, Sequence] = 1,  # Layers or n last layers to take
         reshape: bool = False,
         return_class_token: bool = False,
-        norm=True,
+        norm: bool = True,
     ) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
         if self.chunked_blocks:
             outputs = self._get_intermediate_layers_chunked(x, n)
@@ -318,7 +312,7 @@ def get_intermediate_layers(
                 for out in outputs
             ]
         if return_class_token:
-            return tuple(zip(outputs, class_tokens))
+            return tuple(zip(outputs, class_tokens, strict=False))
         return tuple(outputs)
 
     def forward(self, *args, is_training=False, **kwargs):
diff --git a/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/__init__.py b/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/__init__.py
@@ -4,8 +4,9 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .attention import MemEffAttention
-from .block import NestedTensorBlock
-from .mlp import Mlp
-from .patch_embed import PatchEmbed
-from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
+
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.attention import MemEffAttention  # noqa
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.block import NestedTensorBlock  # noqa
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.mlp import Mlp  # noqa
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.patch_embed import PatchEmbed  # noqa
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused  # noqa
diff --git a/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/attention.py b/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/attention.py
@@ -8,19 +8,16 @@
 #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
 
-import logging
+# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2
 
-from torch import Tensor, nn
-
-logger = logging.getLogger("dinov2")
 
+from torch import Tensor, nn
 
 try:
-    from xformers.ops import fmha, memory_efficient_attention, unbind
+    from xformers.ops import memory_efficient_attention, unbind
 
     XFORMERS_AVAILABLE = True
 except ImportError:
-    logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False
 
 
diff --git a/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/block.py b/invokeai/backend/image_util/depth_anything/v2/dinov2_layers/block.py
@@ -8,26 +8,22 @@
 #   https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
 #   https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
 
-import logging
+
 from typing import Any, Callable, Dict, List, Tuple
 
 import torch
 from torch import Tensor, nn
 
-from .attention import Attention, MemEffAttention
-from .drop_path import DropPath
-from .layer_scale import LayerScale
-from .mlp import Mlp
-
-logger = logging.getLogger("dinov2")
-
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.attention import Attention, MemEffAttention
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.drop_path import DropPath
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.layer_scale import LayerScale
+from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.mlp import Mlp
 
 try:
     from xformers.ops import fmha, index_select_cat, scaled_index_add
 
     XFORMERS_AVAILABLE = True
 except ImportError:
-    logger.warning("xFormers not available")
     XFORMERS_AVAILABLE = False
 
 
@@ -157,10 +153,10 @@ def get_attn_bias_and_cat(x_list, branges=None):
     this will perform the index select, cat the tensors, and provide the attn_bias from cache
     """
     batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
-    all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
+    all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list, strict=False))
     if all_shapes not in attn_bias_cache.keys():
         seqlens = []
-        for b, x in zip(batch_sizes, x_list):
+        for b, x in zip(batch_sizes, x_list, strict=False):
             for _ in range(b):
                 seqlens.append(x.shape[1])
         attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
@@ -194,7 +190,9 @@ def drop_add_residual_stochastic_depth_list(
     residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias))  # type: ignore
 
     outputs = []
-    for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
+    for x, brange, residual, residual_scale_factor in zip(
+        x_list, branges, residual_list, residual_scale_factors, strict=False
+    ):
         outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
     return outputs
 
diff --git a/invokeai/backend/image_util/depth_anything/v2/dpt.py b/invokeai/backend/image_util/depth_anything/v2/dpt.py
@@ -1,12 +1,17 @@
+# Referenced from https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/dpt.py
+
+from typing import List, Literal, Optional
+
 import cv2
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torchvision.transforms import Compose
 
-from .dinov2 import DINOv2
-from .utils.blocks import FeatureFusionBlock, _make_scratch
-from .utils.transform import NormalizeImage, PrepareForNet, Resize
+from invokeai.backend.image_util.depth_anything.v2.dinov2 import DINOv2
+from invokeai.backend.image_util.depth_anything.v2.utils.blocks import FeatureFusionBlock, _make_scratch
+from invokeai.backend.image_util.depth_anything.v2.utils.transform import NormalizeImage, PrepareForNet, Resize
 
 
 def _make_fusion_block(features, use_bn, size=None):
@@ -37,10 +42,18 @@ def forward(self, x):
 
 class DPTHead(nn.Module):
     def __init__(
-        self, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False
+        self,
+        in_channels: int,
+        features: int = 256,
+        use_bn: bool = False,
+        out_channels: Optional[List[int]] = None,
+        use_clstoken: bool = False,
     ):
         super(DPTHead, self).__init__()
 
+        if out_channels is None:
+            out_channels = [256, 512, 1024, 1024]
+
         self.use_clstoken = use_clstoken
 
         self.projects = nn.ModuleList(
@@ -140,10 +153,18 @@ def forward(self, out_features, patch_h, patch_w):
 
 class DepthAnythingV2(nn.Module):
     def __init__(
-        self, encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024], use_bn=False, use_clstoken=False
+        self,
+        encoder: Literal["vits", "vitb", "vitl", "vitg"] = "vitl",
+        features: int = 256,
+        out_channels: Optional[List[int]] = None,
+        use_bn: bool = False,
+        use_clstoken: bool = False,
     ):
         super(DepthAnythingV2, self).__init__()
 
+        if out_channels is None:
+            out_channels = [256, 512, 1024, 1024]
+
         self.intermediate_layer_idx = {
             "vits": [2, 5, 8, 11],
             "vitb": [2, 5, 8, 11],
@@ -158,7 +179,7 @@ def __init__(
             self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken
         )
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14
 
         features = self.pretrained.get_intermediate_layers(
@@ -171,7 +192,7 @@ def forward(self, x):
         return depth.squeeze(1)
 
     @torch.no_grad()
-    def infer_image(self, raw_image, input_size=518):
+    def infer_image(self, raw_image: np.ndarray, input_size: int = 518):
         image, (h, w) = self.image2tensor(raw_image, input_size)
 
         depth = self.forward(image)
diff --git a/invokeai/backend/image_util/depth_anything/v2/utils/blocks.py b/invokeai/backend/image_util/depth_anything/v2/utils/blocks.py
@@ -1,3 +1,5 @@
+# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/util/blocks.py
+
 import torch.nn as nn
 
 
@@ -53,7 +55,7 @@ def __init__(self, features, activation, bn):
 
         self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
 
-        if self.bn == True:
+        if self.bn:
             self.bn1 = nn.BatchNorm2d(features)
             self.bn2 = nn.BatchNorm2d(features)
 
@@ -73,12 +75,12 @@ def forward(self, x):
 
         out = self.activation(x)
         out = self.conv1(out)
-        if self.bn == True:
+        if self.bn:
             out = self.bn1(out)
 
         out = self.activation(out)
         out = self.conv2(out)
-        if self.bn == True:
+        if self.bn:
             out = self.bn2(out)
 
         if self.groups > 1:
@@ -105,7 +107,7 @@ def __init__(self, features, activation, deconv=False, bn=False, expand=False, a
 
         self.expand = expand
         out_features = features
-        if self.expand == True:
+        if self.expand:
             out_features = features // 2
 
         self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
diff --git a/invokeai/backend/image_util/depth_anything/v2/utils/transform.py b/invokeai/backend/image_util/depth_anything/v2/utils/transform.py
@@ -1,3 +1,5 @@
+# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/util/transform.py
+
 import cv2
 import numpy as np
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/util/transform.py`
	`2`	`+`
`1`	`3`	`import cv2`
`2`	`4`	`import numpy as np`
`3`	`5`