Skip to content

Commit 4166c75

Browse files
wip: depth_anything_v2 init lint fixes
1 parent 4f0dfbd commit 4166c75

File tree

7 files changed

+62
-47
lines changed

7 files changed

+62
-47
lines changed

invokeai/backend/image_util/depth_anything/v2/dinov2.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
88
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
99

10-
import logging
10+
1111
import math
1212
from functools import partial
1313
from typing import Callable, Sequence, Tuple, Union
@@ -17,11 +17,8 @@
1717
import torch.utils.checkpoint
1818
from torch.nn.init import trunc_normal_
1919

20-
from .dinov2_layers import MemEffAttention, Mlp
21-
from .dinov2_layers import NestedTensorBlock as Block
22-
from .dinov2_layers import PatchEmbed, SwiGLUFFNFused
23-
24-
logger = logging.getLogger("dinov2")
20+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers import MemEffAttention, Mlp, PatchEmbed, SwiGLUFFNFused
21+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers import NestedTensorBlock as Block
2522

2623

2724
def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
@@ -120,13 +117,10 @@ def __init__(
120117
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
121118

122119
if ffn_layer == "mlp":
123-
logger.info("using MLP layer as FFN")
124120
ffn_layer = Mlp
125121
elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
126-
logger.info("using SwiGLU layer as FFN")
127122
ffn_layer = SwiGLUFFNFused
128123
elif ffn_layer == "identity":
129-
logger.info("using Identity layer as FFN")
130124

131125
def f(*args, **kwargs):
132126
return nn.Identity()
@@ -232,13 +226,13 @@ def prepare_tokens_with_masks(self, x, masks=None):
232226
return x
233227

234228
def forward_features_list(self, x_list, masks_list):
235-
x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
229+
x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list, strict=False)]
236230
for blk in self.blocks:
237231
x = blk(x)
238232

239233
all_x = x
240234
output = []
241-
for x, masks in zip(all_x, masks_list):
235+
for x, masks in zip(all_x, masks_list, strict=False):
242236
x_norm = self.norm(x)
243237
output.append(
244238
{
@@ -301,7 +295,7 @@ def get_intermediate_layers(
301295
n: Union[int, Sequence] = 1, # Layers or n last layers to take
302296
reshape: bool = False,
303297
return_class_token: bool = False,
304-
norm=True,
298+
norm: bool = True,
305299
) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
306300
if self.chunked_blocks:
307301
outputs = self._get_intermediate_layers_chunked(x, n)
@@ -318,7 +312,7 @@ def get_intermediate_layers(
318312
for out in outputs
319313
]
320314
if return_class_token:
321-
return tuple(zip(outputs, class_tokens))
315+
return tuple(zip(outputs, class_tokens, strict=False))
322316
return tuple(outputs)
323317

324318
def forward(self, *args, is_training=False, **kwargs):

invokeai/backend/image_util/depth_anything/v2/dinov2_layers/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
# This source code is licensed under the license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
from .attention import MemEffAttention
8-
from .block import NestedTensorBlock
9-
from .mlp import Mlp
10-
from .patch_embed import PatchEmbed
11-
from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
7+
8+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.attention import MemEffAttention # noqa
9+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.block import NestedTensorBlock # noqa
10+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.mlp import Mlp # noqa
11+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.patch_embed import PatchEmbed # noqa
12+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused # noqa

invokeai/backend/image_util/depth_anything/v2/dinov2_layers/attention.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,16 @@
88
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
99
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
1010

11-
import logging
11+
# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2
1212

13-
from torch import Tensor, nn
14-
15-
logger = logging.getLogger("dinov2")
1613

14+
from torch import Tensor, nn
1715

1816
try:
19-
from xformers.ops import fmha, memory_efficient_attention, unbind
17+
from xformers.ops import memory_efficient_attention, unbind
2018

2119
XFORMERS_AVAILABLE = True
2220
except ImportError:
23-
logger.warning("xFormers not available")
2421
XFORMERS_AVAILABLE = False
2522

2623

invokeai/backend/image_util/depth_anything/v2/dinov2_layers/block.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,22 @@
88
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
99
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
1010

11-
import logging
11+
1212
from typing import Any, Callable, Dict, List, Tuple
1313

1414
import torch
1515
from torch import Tensor, nn
1616

17-
from .attention import Attention, MemEffAttention
18-
from .drop_path import DropPath
19-
from .layer_scale import LayerScale
20-
from .mlp import Mlp
21-
22-
logger = logging.getLogger("dinov2")
23-
17+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.attention import Attention, MemEffAttention
18+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.drop_path import DropPath
19+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.layer_scale import LayerScale
20+
from invokeai.backend.image_util.depth_anything.v2.dinov2_layers.mlp import Mlp
2421

2522
try:
2623
from xformers.ops import fmha, index_select_cat, scaled_index_add
2724

2825
XFORMERS_AVAILABLE = True
2926
except ImportError:
30-
logger.warning("xFormers not available")
3127
XFORMERS_AVAILABLE = False
3228

3329

@@ -157,10 +153,10 @@ def get_attn_bias_and_cat(x_list, branges=None):
157153
this will perform the index select, cat the tensors, and provide the attn_bias from cache
158154
"""
159155
batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
160-
all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
156+
all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list, strict=False))
161157
if all_shapes not in attn_bias_cache.keys():
162158
seqlens = []
163-
for b, x in zip(batch_sizes, x_list):
159+
for b, x in zip(batch_sizes, x_list, strict=False):
164160
for _ in range(b):
165161
seqlens.append(x.shape[1])
166162
attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
@@ -194,7 +190,9 @@ def drop_add_residual_stochastic_depth_list(
194190
residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias)) # type: ignore
195191

196192
outputs = []
197-
for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
193+
for x, brange, residual, residual_scale_factor in zip(
194+
x_list, branges, residual_list, residual_scale_factors, strict=False
195+
):
198196
outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
199197
return outputs
200198

invokeai/backend/image_util/depth_anything/v2/dpt.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
# Referenced from https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/dpt.py
2+
3+
from typing import List, Literal, Optional
4+
15
import cv2
6+
import numpy as np
27
import torch
38
import torch.nn as nn
49
import torch.nn.functional as F
510
from torchvision.transforms import Compose
611

7-
from .dinov2 import DINOv2
8-
from .utils.blocks import FeatureFusionBlock, _make_scratch
9-
from .utils.transform import NormalizeImage, PrepareForNet, Resize
12+
from invokeai.backend.image_util.depth_anything.v2.dinov2 import DINOv2
13+
from invokeai.backend.image_util.depth_anything.v2.utils.blocks import FeatureFusionBlock, _make_scratch
14+
from invokeai.backend.image_util.depth_anything.v2.utils.transform import NormalizeImage, PrepareForNet, Resize
1015

1116

1217
def _make_fusion_block(features, use_bn, size=None):
@@ -37,10 +42,18 @@ def forward(self, x):
3742

3843
class DPTHead(nn.Module):
3944
def __init__(
40-
self, in_channels, features=256, use_bn=False, out_channels=[256, 512, 1024, 1024], use_clstoken=False
45+
self,
46+
in_channels: int,
47+
features: int = 256,
48+
use_bn: bool = False,
49+
out_channels: Optional[List[int]] = None,
50+
use_clstoken: bool = False,
4151
):
4252
super(DPTHead, self).__init__()
4353

54+
if out_channels is None:
55+
out_channels = [256, 512, 1024, 1024]
56+
4457
self.use_clstoken = use_clstoken
4558

4659
self.projects = nn.ModuleList(
@@ -140,10 +153,18 @@ def forward(self, out_features, patch_h, patch_w):
140153

141154
class DepthAnythingV2(nn.Module):
142155
def __init__(
143-
self, encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024], use_bn=False, use_clstoken=False
156+
self,
157+
encoder: Literal["vits", "vitb", "vitl", "vitg"] = "vitl",
158+
features: int = 256,
159+
out_channels: Optional[List[int]] = None,
160+
use_bn: bool = False,
161+
use_clstoken: bool = False,
144162
):
145163
super(DepthAnythingV2, self).__init__()
146164

165+
if out_channels is None:
166+
out_channels = [256, 512, 1024, 1024]
167+
147168
self.intermediate_layer_idx = {
148169
"vits": [2, 5, 8, 11],
149170
"vitb": [2, 5, 8, 11],
@@ -158,7 +179,7 @@ def __init__(
158179
self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken
159180
)
160181

161-
def forward(self, x):
182+
def forward(self, x: torch.Tensor):
162183
patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14
163184

164185
features = self.pretrained.get_intermediate_layers(
@@ -171,7 +192,7 @@ def forward(self, x):
171192
return depth.squeeze(1)
172193

173194
@torch.no_grad()
174-
def infer_image(self, raw_image, input_size=518):
195+
def infer_image(self, raw_image: np.ndarray, input_size: int = 518):
175196
image, (h, w) = self.image2tensor(raw_image, input_size)
176197

177198
depth = self.forward(image)

invokeai/backend/image_util/depth_anything/v2/utils/blocks.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/util/blocks.py
2+
13
import torch.nn as nn
24

35

@@ -53,7 +55,7 @@ def __init__(self, features, activation, bn):
5355

5456
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
5557

56-
if self.bn == True:
58+
if self.bn:
5759
self.bn1 = nn.BatchNorm2d(features)
5860
self.bn2 = nn.BatchNorm2d(features)
5961

@@ -73,12 +75,12 @@ def forward(self, x):
7375

7476
out = self.activation(x)
7577
out = self.conv1(out)
76-
if self.bn == True:
78+
if self.bn:
7779
out = self.bn1(out)
7880

7981
out = self.activation(out)
8082
out = self.conv2(out)
81-
if self.bn == True:
83+
if self.bn:
8284
out = self.bn2(out)
8385

8486
if self.groups > 1:
@@ -105,7 +107,7 @@ def __init__(self, features, activation, deconv=False, bn=False, expand=False, a
105107

106108
self.expand = expand
107109
out_features = features
108-
if self.expand == True:
110+
if self.expand:
109111
out_features = features // 2
110112

111113
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)

invokeai/backend/image_util/depth_anything/v2/utils/transform.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Referenced from: https://github.com/DepthAnything/Depth-Anything-V2/blob/main/depth_anything_v2/util/transform.py
2+
13
import cv2
24
import numpy as np
35

0 commit comments

Comments
 (0)