Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion candle-examples/examples/depth_anything_v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ This example first instantiates the DINOv2 model and then proceeds to create Dep
## Running an example with color map and CUDA

```bash
cargo run --features cuda,depth_anything_v2 --package candle-examples --example depth_anything_v2 -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg
cargo run --features metal,depth_anything_v2 --package candle-examples --example depth_anything_v2 -- --color-map --image candle-examples/examples/yolo-v8/assets/bike.jpg
```

4 changes: 2 additions & 2 deletions candle-examples/examples/depth_anything_v2/color_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use enterpolation::linear::ConstEquidistantLinear;
use enterpolation::Generator;
use palette::LinSrgb;

use candle::Tensor;
use candle::{BackendStorage, Tensor};

pub struct SpectralRColormap {
gradient: ConstEquidistantLinear<f32, LinSrgb, 9>,
Expand Down Expand Up @@ -30,7 +30,7 @@ impl SpectralRColormap {
self.gradient.gen(value)
}

pub fn gray2color(&self, gray: &Tensor) -> candle::Result<Tensor> {
pub fn gray2color<B: BackendStorage>(&self, gray: &Tensor<B>) -> candle::Result<Tensor<B>> {
println!("Gray: {:?}", gray.dims());
let gray_values: Vec<f32> = gray.flatten_all()?.to_vec1()?;
let rgb_values: Vec<f32> = gray_values
Expand Down
84 changes: 55 additions & 29 deletions candle-examples/examples/depth_anything_v2/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ use clap::Parser;
use std::{ffi::OsString, path::PathBuf, sync::Arc};

use candle::DType::{F32, U8};
use candle::{DType, Device, Module, Result, Tensor};
use candle::{
BackendDevice, BackendStorage, CpuStorage, DType, Device, Module, Result, Tensor,
TryConvertStorage,
};
use candle_examples::{load_image, load_image_and_resize, save_image};
use candle_nn::VarBuilder;
use candle_transformers::models::depth_anything_v2::{DepthAnythingV2, DepthAnythingV2Config};
Expand Down Expand Up @@ -47,42 +50,43 @@ struct Args {
color_map: bool,
}

pub fn main() -> anyhow::Result<()> {
let args = Args::parse();
let device = candle_examples::device(args.cpu)?;

pub fn run<B: BackendStorage + TryConvertStorage<CpuStorage> + 'static>(
args: Args,
device: &B::Device,
) -> Result<()> {
let dinov2_model_file = match args.dinov2_model {
None => {
let api = hf_hub::api::sync::Api::new()?;
let api = hf_hub::api::sync::Api::new().unwrap();
let api = api.model("lmz/candle-dino-v2".into());
api.get("dinov2_vits14.safetensors")?
api.get("dinov2_vits14.safetensors").unwrap()
}
Some(dinov2_model) => dinov2_model,
};
println!("Using file {:?}", dinov2_model_file);

let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[dinov2_model_file], F32, &device)? };
let dinov2 = dinov2::vit_small(vb)?;
let vb = unsafe { VarBuilder::from_mmaped_safetensors(&[dinov2_model_file], F32, device)? };
let dinov2: dinov2::DinoVisionTransformer<B> = dinov2::vit_small(vb)?;
println!("DinoV2 model built");

let depth_anything_model_file = match args.depth_anything_v2_model {
None => {
let api = hf_hub::api::sync::Api::new()?;
let api = hf_hub::api::sync::Api::new().unwrap();
let api = api.model("jeroenvlek/depth-anything-v2-safetensors".into());
api.get("depth_anything_v2_vits.safetensors")?
api.get("depth_anything_v2_vits.safetensors").unwrap()
}
Some(depth_anything_model) => depth_anything_model,
};
println!("Using file {:?}", depth_anything_model_file);

let vb = unsafe {
VarBuilder::from_mmaped_safetensors(&[depth_anything_model_file], DType::F32, &device)?
VarBuilder::from_mmaped_safetensors(&[depth_anything_model_file], DType::F32, device)?
};

let config = DepthAnythingV2Config::vit_small();
let depth_anything = DepthAnythingV2::new(Arc::new(dinov2), config, vb)?;

let (original_height, original_width, image) = load_and_prep_image(&args.image, &device)?;
let (original_height, original_width, image) =
load_and_prep_image(&args.image, device).unwrap();

println!("Loaded image {image:?}");

Expand All @@ -99,6 +103,21 @@ pub fn main() -> anyhow::Result<()> {
Ok(())
}

pub fn main() -> anyhow::Result<()> {
let args = Args::parse();

if args.cpu {
run::<candle::CpuStorage>(args, &candle::CpuDevice)?;
} else {
#[cfg(feature = "cuda")]
run::<candle::CudaStorage>(args, &candle::CudaDevice::new(0)?)?;

#[cfg(feature = "metal")]
run::<candle::MetalStorage>(args, &candle::MetalDevice::new(0)?)?;
}
Ok(())
}

fn full_output_path(image_path: &PathBuf, output_dir: &Option<PathBuf>) -> PathBuf {
let input_file_name = image_path.file_name().unwrap();
let mut output_file_name = OsString::from("depth_");
Expand All @@ -112,40 +131,45 @@ fn full_output_path(image_path: &PathBuf, output_dir: &Option<PathBuf>) -> PathB
output_path
}

fn load_and_prep_image(
fn load_and_prep_image<B: BackendStorage + TryConvertStorage<CpuStorage>>(
image_path: &PathBuf,
device: &Device,
) -> anyhow::Result<(usize, usize, Tensor)> {
let (_original_image, original_height, original_width) = load_image(&image_path, None)?;
device: &B::Device,
) -> anyhow::Result<(usize, usize, Tensor<B>)> {
let (_original_image, original_height, original_width): (Tensor<B>, usize, usize) =
load_image(&image_path, None, device)?;

let image = load_image_and_resize(&image_path, DINO_IMG_SIZE, DINO_IMG_SIZE)?
let image = load_image_and_resize(&image_path, DINO_IMG_SIZE, DINO_IMG_SIZE, device)?
.unsqueeze(0)?
.to_dtype(F32)?
.to_device(&device)?;
.to_dtype(F32)?;

let max_pixel_val = Tensor::try_from(255.0f32)?
.to_device(&device)?
.to_device(device)?
.broadcast_as(image.shape())?;
let image = (image / max_pixel_val)?;
let image = normalize_image(&image, &MAGIC_MEAN, &MAGIC_STD)?;
let image = normalize_image(&image, &MAGIC_MEAN, &MAGIC_STD, device)?;

Ok((original_height, original_width, image))
}

fn normalize_image(image: &Tensor, mean: &[f32; 3], std: &[f32; 3]) -> Result<Tensor> {
fn normalize_image<B: BackendStorage + TryConvertStorage<CpuStorage>>(
image: &Tensor<B>,
mean: &[f32; 3],
std: &[f32; 3],
device: &B::Device,
) -> Result<Tensor<B>> {
let mean_tensor =
Tensor::from_vec(mean.to_vec(), (3, 1, 1), &image.device())?.broadcast_as(image.shape())?;
Tensor::from_vec(mean.to_vec(), (3, 1, 1), device)?.broadcast_as(image.shape())?;
let std_tensor =
Tensor::from_vec(std.to_vec(), (3, 1, 1), &image.device())?.broadcast_as(image.shape())?;
Tensor::from_vec(std.to_vec(), (3, 1, 1), device)?.broadcast_as(image.shape())?;
image.sub(&mean_tensor)?.div(&std_tensor)
}

fn post_process_image(
image: &Tensor,
fn post_process_image<B: BackendStorage + TryConvertStorage<CpuStorage>>(
image: &Tensor<B>,
original_height: usize,
original_width: usize,
color_map: bool,
) -> Result<Tensor> {
) -> Result<Tensor<B>> {
let out = image.interpolate2d(original_height, original_width)?;
let out = scale_image(&out)?;

Expand All @@ -165,7 +189,9 @@ fn post_process_image(
out.to_dtype(U8)
}

fn scale_image(depth: &Tensor) -> Result<Tensor> {
fn scale_image<B: BackendStorage + TryConvertStorage<CpuStorage>>(
depth: &Tensor<B>,
) -> Result<Tensor<B>> {
let flat_values: Vec<f32> = depth.flatten_all()?.to_vec1()?;

let min_val = flat_values.iter().min_by(|a, b| a.total_cmp(b)).unwrap();
Expand Down
Loading