1- # Born out of Depth Anything V1 Issue 36
2- # Make sure you have the necessary libraries
3- # Code by @1ssb
1+ """
2+ Born out of Depth Anything V1 Issue 36
3+ Make sure you have the necessary libraries installed.
4+ Code by @1ssb
5+
6+ This script processes a set of images to generate depth maps and corresponding point clouds.
7+ The resulting point clouds are saved in the specified output directory.
8+
9+ Usage:
10+ python script.py --encoder vitl --load-from path_to_model --max-depth 20 --img-path path_to_images --outdir output_directory --focal-length-x 470.4 --focal-length-y 470.4
11+
12+ Arguments:
13+ --encoder: Model encoder to use. Choices are ['vits', 'vitb', 'vitl', 'vitg'].
14+ --load-from: Path to the pre-trained model weights.
15+ --max-depth: Maximum depth value for the depth map.
16+ --img-path: Path to the input image or directory containing images.
17+ --outdir: Directory to save the output point clouds.
18+ --focal-length-x: Focal length along the x-axis.
19+ --focal-length-y: Focal length along the y-axis.
20+ """
421
522import argparse
623import cv2
1431from depth_anything_v2 .dpt import DepthAnythingV2
1532
1633
17- if __name__ == '__main__' :
18- parser = argparse .ArgumentParser ()
19- parser .add_argument ('--encoder' , default = 'vitl' , type = str , choices = ['vits' , 'vitb' , 'vitl' , 'vitg' ])
20- parser .add_argument ('--load-from' , default = '' , type = str )
21- parser .add_argument ('--max-depth' , default = 20 , type = float )
22-
23- parser .add_argument ('--img-path' , type = str )
24- parser .add_argument ('--outdir' , type = str , default = './vis_pointcloud' )
25-
34+ def main ():
35+ # Parse command-line arguments
36+ parser = argparse .ArgumentParser (description = 'Generate depth maps and point clouds from images.' )
37+ parser .add_argument ('--encoder' , default = 'vitl' , type = str , choices = ['vits' , 'vitb' , 'vitl' , 'vitg' ],
38+ help = 'Model encoder to use.' )
39+ parser .add_argument ('--load-from' , default = '' , type = str , required = True ,
40+ help = 'Path to the pre-trained model weights.' )
41+ parser .add_argument ('--max-depth' , default = 20 , type = float ,
42+ help = 'Maximum depth value for the depth map.' )
43+ parser .add_argument ('--img-path' , type = str , required = True ,
44+ help = 'Path to the input image or directory containing images.' )
45+ parser .add_argument ('--outdir' , type = str , default = './vis_pointcloud' ,
46+ help = 'Directory to save the output point clouds.' )
47+ parser .add_argument ('--focal-length-x' , default = 470.4 , type = float ,
48+ help = 'Focal length along the x-axis.' )
49+ parser .add_argument ('--focal-length-y' , default = 470.4 , type = float ,
50+ help = 'Focal length along the y-axis.' )
51+
2652 args = parser .parse_args ()
27-
28- # Global settings
29- FL = 715.0873
30- FY = 784 * 0.6
31- FX = 784 * 0.6
32- NYU_DATA = False
33- FINAL_HEIGHT = 518
34- FINAL_WIDTH = 518
35-
53+
54+ # Determine the device to use (CUDA, MPS, or CPU)
3655 DEVICE = 'cuda' if torch .cuda .is_available () else 'mps' if torch .backends .mps .is_available () else 'cpu'
37-
56+
57+ # Model configuration based on the chosen encoder
3858 model_configs = {
3959 'vits' : {'encoder' : 'vits' , 'features' : 64 , 'out_channels' : [48 , 96 , 192 , 384 ]},
4060 'vitb' : {'encoder' : 'vitb' , 'features' : 128 , 'out_channels' : [96 , 192 , 384 , 768 ]},
4161 'vitl' : {'encoder' : 'vitl' , 'features' : 256 , 'out_channels' : [256 , 512 , 1024 , 1024 ]},
4262 'vitg' : {'encoder' : 'vitg' , 'features' : 384 , 'out_channels' : [1536 , 1536 , 1536 , 1536 ]}
4363 }
44-
64+
65+ # Initialize the DepthAnythingV2 model with the specified configuration
4566 depth_anything = DepthAnythingV2 (** {** model_configs [args .encoder ], 'max_depth' : args .max_depth })
4667 depth_anything .load_state_dict (torch .load (args .load_from , map_location = 'cpu' ))
4768 depth_anything = depth_anything .to (DEVICE ).eval ()
48-
69+
70+ # Get the list of image files to process
4971 if os .path .isfile (args .img_path ):
5072 if args .img_path .endswith ('txt' ):
5173 with open (args .img_path , 'r' ) as f :
5476 filenames = [args .img_path ]
5577 else :
5678 filenames = glob .glob (os .path .join (args .img_path , '**/*' ), recursive = True )
57-
79+
80+ # Create the output directory if it doesn't exist
5881 os .makedirs (args .outdir , exist_ok = True )
59-
82+
83+ # Process each image file
6084 for k , filename in enumerate (filenames ):
61- print (f'Progress { k + 1 } /{ len (filenames )} : { filename } ' )
62-
85+ print (f'Processing { k + 1 } /{ len (filenames )} : { filename } ' )
86+
87+ # Load the image
6388 color_image = Image .open (filename ).convert ('RGB' )
64-
89+ width , height = color_image .size
90+
91+ # Read the image using OpenCV
6592 image = cv2 .imread (filename )
66- pred = depth_anything .infer_image (image , FINAL_HEIGHT )
67-
68- # Resize color image and depth to final size
69- resized_color_image = color_image .resize ((FINAL_WIDTH , FINAL_HEIGHT ), Image .LANCZOS )
70- resized_pred = Image .fromarray (pred ).resize ((FINAL_WIDTH , FINAL_HEIGHT ), Image .NEAREST )
71-
72- focal_length_x , focal_length_y = (FX , FY ) if not NYU_DATA else (FL , FL )
73- x , y = np .meshgrid (np .arange (FINAL_WIDTH ), np .arange (FINAL_HEIGHT ))
74- x = (x - FINAL_WIDTH / 2 ) / focal_length_x
75- y = (y - FINAL_HEIGHT / 2 ) / focal_length_y
93+ pred = depth_anything .infer_image (image , height )
94+
95+ # Resize depth prediction to match the original image size
96+ resized_pred = Image .fromarray (pred ).resize ((width , height ), Image .NEAREST )
97+
98+ # Generate mesh grid and calculate point cloud coordinates
99+ x , y = np .meshgrid (np .arange (width ), np .arange (height ))
100+ x = (x - width / 2 ) / args .focal_length_x
101+ y = (y - height / 2 ) / args .focal_length_y
76102 z = np .array (resized_pred )
77103 points = np .stack ((np .multiply (x , z ), np .multiply (y , z ), z ), axis = - 1 ).reshape (- 1 , 3 )
78- colors = np .array (resized_color_image ).reshape (- 1 , 3 ) / 255.0
79-
104+ colors = np .array (color_image ).reshape (- 1 , 3 ) / 255.0
105+
106+ # Create the point cloud and save it to the output directory
80107 pcd = o3d .geometry .PointCloud ()
81108 pcd .points = o3d .utility .Vector3dVector (points )
82109 pcd .colors = o3d .utility .Vector3dVector (colors )
83- o3d .io .write_point_cloud (os .path .join (args .outdir , os .path .splitext (os .path .basename (filename ))[0 ] + ".ply" ), pcd )
110+ o3d .io .write_point_cloud (os .path .join (args .outdir , os .path .splitext (os .path .basename (filename ))[0 ] + ".ply" ), pcd )
111+
112+
113+ if __name__ == '__main__' :
114+ main ()
0 commit comments