-
Notifications
You must be signed in to change notification settings - Fork 161
Description
Hi there, thanks for your work on this. I was looking to create stereo images from the image + depth map and found this:
https://github.com/m5823779/stereo-image-generation
It's a bit out of date but I managed to get it working. I found the res101 depthmap output produces better results than the existing midas integration in that repo. I extracted the neccessary bits from stereo_generation_image.py and modified it to read the depth map output produced from your extension.
I've tested out the stereo image in an Oculus CV1 headset using Whirligig, images attached. If I use this frequently I'm happy to write a PR if you think it's a good addition to your plugin, otherwise in case you wanted to integrate it sooner, heres the necessary code to run it on the command line.
Cheers
import os
import cv2
import argparse
import numpy as np
MONITOR_W = 38.5
def write_depth(depth, bits=1, reverse=True):
depth_min = depth.min()
depth_max = depth.max()
max_val = (2 ** (8 * bits)) - 1
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (depth - depth_min) / (depth_max - depth_min)
else:
out = 0
if not reverse:
out = max_val - out
if bits == 2:
depth_map = out.astype("uint16")
else:
depth_map = out.astype("uint8")
return depth_map
def generate_stereo(left_img, depth, ipd):
h, w, c = left_img.shape
depth_min = depth.min()
depth_max = depth.max()
depth = (depth - depth_min) / (depth_max - depth_min)
right = np.zeros_like(left_img)
deviation_cm = ipd * 0.12
deviation = deviation_cm * MONITOR_W * (w / 1920)
print("\ndeviation:", deviation)
for row in range(h):
for col in range(w):
col_r = col - int((1 - depth[row][col] ** 2) * deviation)
# col_r = col - int((1 - depth[row][col]) * deviation)
if col_r >= 0:
right[row][col_r] = left_img[row][col]
right_fix = np.array(right)
gray = cv2.cvtColor(right_fix, cv2.COLOR_BGR2GRAY)
rows, cols = np.where(gray == 0)
for row, col in zip(rows, cols):
for offset in range(1, int(deviation)):
r_offset = col + offset
l_offset = col - offset
if r_offset < w and not np.all(right_fix[row][r_offset] == 0):
right_fix[row][col] = right_fix[row][r_offset]
break
if l_offset >= 0 and not np.all(right_fix[row][l_offset] == 0):
right_fix[row][col] = right_fix[row][l_offset]
break
return right_fix
def overlap(im1, im2):
width1 = im1.shape[1]
height1 = im1.shape[0]
width2 = im2.shape[1]
height2 = im2.shape[0]
# final image
composite = np.zeros((height2, width2, 3), np.uint8)
# iterate through "left" image, filling in red values of final image
for i in range(height1):
for j in range(width1):
try:
composite[i, j, 2] = im1[i, j, 2]
except IndexError:
pass
# iterate through "right" image, filling in blue/green values of final image
for i in range(height2):
for j in range(width2):
try:
composite[i, j, 1] = im2[i, j, 1]
composite[i, j, 0] = im2[i, j, 0]
except IndexError:
pass
return composite
def parse_args():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("image", metavar="IMAGE", type=str, help="Image")
parser.add_argument("depth", metavar="DEPTH", type=str, help="16bit depth map")
parser.add_argument("--output-dir", type=str, help="Output dir", default=".")
parser.add_argument("--ipd", type=float, default=6.5, help="Interpupillary distance (cm)")
return parser.parse_args()
def main():
args = parse_args()
ipd = args.ipd
left_img = cv2.imread(args.image)
depth_map = cv2.imread(args.depth, cv2.IMREAD_ANYDEPTH)
depth_map = cv2.blur(depth_map, (3, 3))
right_img = generate_stereo(left_img, depth_map, ipd)
stereo = np.hstack([left_img, right_img])
anaglyph = overlap(left_img, right_img)
name, ext = os.path.splitext(os.path.basename(args.image))
cv2.imwrite(os.path.join(args.output_dir, f"{name}_stereo.png"), stereo)
# cv2.imwrite(os.path.join(args.output_dir, f"{name}_anaglyph.png"), anaglyph)
if __name__ == "__main__":
main()


