Skip to content

Commit 7cb49e6

Browse files
blessedcoolanthipsterusername
authored andcommitted
feat: Add Resolution to DepthAnything
1 parent 39fedb0 commit 7cb49e6

File tree

6 files changed

+79
-29
lines changed

6 files changed

+79
-29
lines changed

invokeai/app/invocations/controlnet_image_processors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
621621
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
622622
default="small", description="The size of the depth model to use"
623623
)
624+
resolution: int = InputField(default=512, ge=64, multiple_of=64, description=FieldDescriptions.image_res)
624625
offload: bool = InputField(default=False)
625626

626627
def run_processor(self, image):
@@ -630,5 +631,5 @@ def run_processor(self, image):
630631
if image.mode == "RGBA":
631632
image = image.convert("RGB")
632633

633-
processed_image = depth_anything_detector(image=image, offload=self.offload)
634+
processed_image = depth_anything_detector(image=image, resolution=self.resolution, offload=self.offload)
634635
return processed_image

invokeai/backend/image_util/depth_anything/__init__.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,15 @@ def load_model(self, model_size=Literal["large", "base", "small"]):
6464
del self.model
6565
self.model_size = model_size
6666

67-
if self.model_size == "small":
68-
self.model = DPT_DINOv2(encoder="vits", features=64, out_channels=[48, 96, 192, 384])
69-
if self.model_size == "base":
70-
self.model = DPT_DINOv2(encoder="vitb", features=128, out_channels=[96, 192, 384, 768])
71-
if self.model_size == "large":
72-
self.model = DPT_DINOv2(encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024])
67+
match self.model_size:
68+
case "small":
69+
self.model = DPT_DINOv2(encoder="vits", features=64, out_channels=[48, 96, 192, 384])
70+
case "base":
71+
self.model = DPT_DINOv2(encoder="vitb", features=128, out_channels=[96, 192, 384, 768])
72+
case "large":
73+
self.model = DPT_DINOv2(encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024])
74+
case _:
75+
raise TypeError("Not a supported model")
7376

7477
self.model.load_state_dict(torch.load(DEPTH_ANYTHING_MODEL_PATH.as_posix(), map_location="cpu"))
7578
self.model.eval()
@@ -81,12 +84,11 @@ def to(self, device):
8184
self.model.to(device)
8285
return self
8386

84-
def __call__(self, image, offload=False):
87+
def __call__(self, image, resolution=512, offload=False):
8588
image = np.array(image, dtype=np.uint8)
86-
original_width, original_height = image.shape[:2]
8789
image = image[:, :, ::-1] / 255.0
8890

89-
image_width, image_height = image.shape[:2]
91+
image_height, image_width = image.shape[:2]
9092
image = transform({"image": image})["image"]
9193
image = torch.from_numpy(image).unsqueeze(0).to(choose_torch_device())
9294

@@ -97,7 +99,9 @@ def __call__(self, image, offload=False):
9799

98100
depth_map = repeat(depth, "h w -> h w 3").cpu().numpy().astype(np.uint8)
99101
depth_map = Image.fromarray(depth_map)
100-
depth_map = depth_map.resize((original_height, original_width))
102+
103+
new_height = int(image_height * (resolution / image_width))
104+
depth_map = depth_map.resize((resolution, new_height))
101105

102106
if offload:
103107
del self.model

invokeai/frontend/web/src/features/controlAdapters/components/processors/DepthAnyThingProcessor.tsx

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
import type { ComboboxOnChange } from '@invoke-ai/ui';
2-
import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui';
2+
import {
3+
Combobox,
4+
CompositeNumberInput,
5+
CompositeSlider,
6+
FormControl,
7+
FormLabel,
8+
} from '@invoke-ai/ui';
39
import { useProcessorNodeChanged } from 'features/controlAdapters/components/hooks/useProcessorNodeChanged';
410
import { CONTROLNET_PROCESSORS } from 'features/controlAdapters/store/constants';
511
import type {
@@ -23,7 +29,7 @@ type Props = {
2329

2430
const DepthAnythingProcessor = (props: Props) => {
2531
const { controlNetId, processorNode, isEnabled } = props;
26-
const { model_size } = processorNode;
32+
const { model_size, resolution } = processorNode;
2733
const processorChanged = useProcessorNodeChanged();
2834

2935
const { t } = useTranslation();
@@ -54,6 +60,17 @@ const DepthAnythingProcessor = (props: Props) => {
5460
[options, model_size]
5561
);
5662

63+
const handleResolutionChange = useCallback(
64+
(v: number) => {
65+
processorChanged(controlNetId, { resolution: v });
66+
},
67+
[controlNetId, processorChanged]
68+
);
69+
70+
const handleResolutionDefaultChange = useCallback(() => {
71+
processorChanged(controlNetId, { resolution: 512 });
72+
}, [controlNetId, processorChanged]);
73+
5774
return (
5875
<ProcessorWrapper>
5976
<FormControl isDisabled={!isEnabled}>
@@ -65,6 +82,27 @@ const DepthAnythingProcessor = (props: Props) => {
6582
onChange={handleModelSizeChange}
6683
/>
6784
</FormControl>
85+
<FormControl isDisabled={!isEnabled}>
86+
<FormLabel>{t('controlnet.imageResolution')}</FormLabel>
87+
<CompositeSlider
88+
value={resolution}
89+
onChange={handleResolutionChange}
90+
defaultValue={DEFAULTS.resolution}
91+
min={64}
92+
max={4096}
93+
step={64}
94+
marks
95+
onReset={handleResolutionDefaultChange}
96+
/>
97+
<CompositeNumberInput
98+
value={resolution}
99+
onChange={handleResolutionChange}
100+
defaultValue={DEFAULTS.resolution}
101+
min={64}
102+
max={4096}
103+
step={64}
104+
/>
105+
</FormControl>
68106
</ProcessorWrapper>
69107
);
70108
};

invokeai/frontend/web/src/features/controlAdapters/store/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = {
9595
id: 'depth_anything_image_processor',
9696
type: 'depth_anything_image_processor',
9797
model_size: 'small',
98+
resolution: 512,
9899
offload: false,
99100
},
100101
},

invokeai/frontend/web/src/features/controlAdapters/store/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ export type RequiredContentShuffleImageProcessorInvocation = O.Required<
8080
*/
8181
export type RequiredDepthAnythingImageProcessorInvocation = O.Required<
8282
DepthAnythingImageProcessorInvocation,
83-
'type' | 'model_size' | 'offload'
83+
'type' | 'model_size' | 'resolution' | 'offload'
8484
>;
8585

8686
export const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);

0 commit comments

Comments
 (0)