Add modular sparse-to-dense pipeline and display input depthmap in validation

Tim Taubner · Tim Taubner · commit b76570c864f5 · 2018-04-20T14:47:41.000+02:00
diff --git a/dense_to_sparse.py b/dense_to_sparse.py
@@ -0,0 +1,90 @@
+import numpy as np
+import cv2
+
+
+def rgb2grayscale(rgb):
+    return rgb[:, :, 0] * 0.2989 + rgb[:, :, 1] * 0.587 + rgb[:, :, 2] * 0.114
+
+
+class DenseToSparse:
+    def __init__(self):
+        pass
+
+    def dense_to_sparse(self, rgb, depth):
+        pass
+
+    def __repr__(self):
+        pass
+
+
+class UniformSampling(DenseToSparse):
+    name = "uar"
+
+    def __init__(self, num_samples, max_depth=np.inf):
+        DenseToSparse.__init__(self)
+        self.num_samples = num_samples
+        self.max_depth = max_depth
+
+    def __repr__(self):
+        return "%s{ns=%d,md=%f}" % (self.name, self.num_samples, self.max_depth)
+
+    def dense_to_sparse(self, rgb, depth):
+        """
+        Samples pixels with `num_samples`/#pixels probability in `depth`.
+        Only pixels with a maximum depth of `max_depth` are considered.
+        If no `max_depth` is given, samples in all pixels
+        """
+        if self.max_depth is not np.inf:
+            mask_keep = depth <= self.max_depth
+            n_keep = np.count_nonzero(mask_keep)
+            if n_keep == 0:
+                return mask_keep
+            else:
+                prob = float(self.num_samples) / n_keep
+                return np.bitwise_and(mask_keep, np.random.uniform(0, 1, depth.shape) < prob)
+        else:
+            prob = float(self.num_samples) / depth.size
+            return np.random.uniform(0, 1, depth.shape) < prob
+
+
+class SimulatedStereo(DenseToSparse):
+    name = "sim_stereo"
+
+    def __init__(self, num_samples, max_depth=np.inf, dilate_kernel=3, dilate_iterations=1):
+        DenseToSparse.__init__(self)
+        self.num_samples = num_samples
+        self.max_depth = max_depth
+        self.dilate_kernel = dilate_kernel
+        self.dilate_iterations = dilate_iterations
+
+    def __repr__(self):
+        return "%s{ns=%d,md=%f,dil=%d.%d}" % \
+               (self.name, self.num_samples, self.max_depth, self.dilate_kernel, self.dilate_iterations)
+
+    # We do not use cv2.Canny, since that applies non max suppression
+    # So we simply do
+    # RGB to intensitities
+    # Smooth with gaussian
+    # Take simple sobel gradients
+    # Threshold the edge gradient
+    # Dilatate
+    def dense_to_sparse(self, rgb, depth):
+        gray = rgb2grayscale(rgb)
+        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+        gx = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=5)
+        gy = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=5)
+
+        depth_mask = np.bitwise_and(depth != 0.0, depth <= self.max_depth)
+
+        edge_fraction = float(self.num_samples) / np.size(depth)
+
+        mag = cv2.magnitude(gx, gy)
+        min_mag = np.percentile(mag[depth_mask], 100 * (1.0 - edge_fraction))
+        mag_mask = mag >= min_mag
+
+        if self.dilate_iterations >= 0:
+            kernel = np.ones((self.dilate_kernel, self.dilate_kernel), dtype=np.uint8)
+            cv2.dilate(mag_mask.astype(np.uint8), kernel, iterations=self.dilate_iterations)
+
+        mask = np.bitwise_and(mag_mask, depth_mask)
+        return mask
diff --git a/main.py b/main.py
@@ -15,12 +15,14 @@
 from nyu_dataloader import NYUDataset
 from models import Decoder, ResNet
 from metrics import AverageMeter, Result
+from dense_to_sparse import UniformSampling, SimulatedStereo
 import criteria
 import utils
 
 model_names = ['resnet18', 'resnet50']
 loss_names = ['l1', 'l2']
-data_names = ['NYUDataset']
+data_names = ['nyudepthv2']
+sparsifier_names = [x.name for x in [UniformSampling, SimulatedStereo]]
 decoder_names = Decoder.names
 modality_names = NYUDataset.modality_names
 
@@ -46,6 +48,13 @@
                         ' (default: rgb)')
 parser.add_argument('-s', '--num-samples', default=0, type=int, metavar='N',
                     help='number of sparse depth samples (default: 0)')
+parser.add_argument('--max-depth', default=-1.0, type=float, metavar='D',
+                    help='cut-off depth of sparsifier, negative values means infinity (default: inf [m])')
+parser.add_argument('--sparsifier', metavar='SPARSIFIER', default=UniformSampling.name,
+                    choices=sparsifier_names,
+                    help='sparsifier: ' +
+                         ' | '.join(sparsifier_names) +
+                         ' (default: ' + UniformSampling.name + ')')
 parser.add_argument('--decoder', '-d', metavar='DECODER', default='deconv2',
                     choices=decoder_names,
                     help='decoder: ' +
@@ -88,15 +97,24 @@
 def main():
     global args, best_result, output_directory, train_csv, test_csv
     args = parser.parse_args()
-    args.data = os.path.join('data', args.data)
     if args.modality == 'rgb' and args.num_samples != 0:
         print("number of samples is forced to be 0 when input modality is rgb")
         args.num_samples = 0
-    
+    if args.modality == 'rgb' and args.max_depth != 0.0:
+        print("max depth is forced to be 0.0 when input modality is rgb/rgbd")
+        args.max_depth = 0.0
+
+    sparsifier = None
+    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
+    if args.sparsifier == UniformSampling.name:
+        sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth)
+    elif args.sparsifier == SimulatedStereo.name:
+        sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth)
+
     # create results folder, if not already exists
     output_directory = os.path.join('results',
-        'NYUDataset.modality={}.nsample={}.arch={}.decoder={}.criterion={}.lr={}.bs={}'.
-        format(args.modality, args.num_samples, args.arch, args.decoder, args.criterion, args.lr, args.batch_size))
+        '{}.sparsifier={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}'.
+                                    format(args.data, sparsifier, args.modality, args.arch, args.decoder, args.criterion, args.lr, args.batch_size))
     if not os.path.exists(output_directory):
         os.makedirs(output_directory)
     train_csv = os.path.join(output_directory, 'train.csv')
@@ -112,19 +130,19 @@ def main():
 
     # Data loading code
     print("=> creating data loaders ...")
-    traindir = os.path.join(args.data, 'train')
-    valdir = os.path.join(args.data, 'val')
+    traindir = os.path.join('data', args.data, 'train')
+    valdir = os.path.join('data', args.data, 'val')
 
-    train_dataset = NYUDataset(traindir, type='train', 
-        modality=args.modality, num_samples=args.num_samples)
+    train_dataset = NYUDataset(traindir, type='train',
+        modality=args.modality, sparsifier=sparsifier)
     train_loader = torch.utils.data.DataLoader(
         train_dataset, batch_size=args.batch_size, shuffle=True,
         num_workers=args.workers, pin_memory=True, sampler=None)
 
     # set batch size to be 1 for validation
-    val_dataset = NYUDataset(valdir, type='val', 
-        modality=args.modality, num_samples=args.num_samples)
-    val_loader = torch.utils.data.DataLoader(val_dataset, 
+    val_dataset = NYUDataset(valdir, type='val',
+        modality=args.modality, sparsifier=sparsifier)
+    val_loader = torch.utils.data.DataLoader(val_dataset,
         batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True)
 
     print("=> data loaders created.")
@@ -306,11 +324,18 @@ def validate(val_loader, model, epoch, write_to_file=True):
                 rgb = input
             elif args.modality == 'rgbd':
                 rgb = input[:,:3,:,:]
+                depth = input[:,3:,:,:]
 
             if i == 0:
-                img_merge = utils.merge_into_row(rgb, target, depth_pred)
+                if args.modality == 'rgbd':
+                    img_merge = utils.merge_into_row_with_gt(rgb, depth, target, depth_pred)
+                else:
+                    img_merge = utils.merge_into_row(rgb, target, depth_pred)
             elif (i < 8*skip) and (i % skip == 0):
-                row = utils.merge_into_row(rgb, target, depth_pred)
+                if args.modality == 'rgbd':
+                    row = utils.merge_into_row_with_gt(rgb, depth, target, depth_pred)
+                else:
+                    row = utils.merge_into_row(rgb, target, depth_pred)
                 img_merge = utils.add_row(img_merge, row)
             elif i == 8*skip:
                 filename = output_directory + '/comparison_' + str(epoch) + '.png'
diff --git a/nyu_dataloader.py b/nyu_dataloader.py
@@ -96,7 +96,7 @@ def rgb2grayscale(rgb):
 class NYUDataset(data.Dataset):
     modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd'
 
-    def __init__(self, root, type, modality='rgb', num_samples=0, loader=h5_loader):
+    def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader):
         classes, class_to_idx = find_classes(root)
         imgs = make_dataset(root, class_to_idx)
         if len(imgs) == 0:
@@ -115,28 +115,25 @@ def __init__(self, root, type, modality='rgb', num_samples=0, loader=h5_loader):
             raise (RuntimeError("Invalid dataset type: " + type + "\n"
                                 "Supported dataset types are: train, val"))
         self.loader = loader
+        self.sparsifier = sparsifier
 
         if modality in self.modality_names:
             self.modality = modality
-            if modality in ['rgbd', 'd', 'gd']:
-                if num_samples <= 0:
-                    raise (RuntimeError("Invalid number of samples: {}\n".format(num_samples)))
-                self.num_samples = num_samples
-            else:
-                self.num_samples = 0
         else:
             raise (RuntimeError("Invalid modality type: " + modality + "\n"
                                 "Supported dataset types are: " + ''.join(self.modality_names)))
 
-    def create_sparse_depth(self, depth, num_samples):
-        prob = float(num_samples) / depth.size
-        mask_keep = np.random.uniform(0, 1, depth.shape) < prob
-        sparse_depth = np.zeros(depth.shape)
-        sparse_depth[mask_keep] = depth[mask_keep]
-        return sparse_depth
+    def create_sparse_depth(self, rgb, depth):
+        if self.sparsifier is None:
+            return depth
+        else:
+            mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
+            sparse_depth = np.zeros(depth.shape)
+            sparse_depth[mask_keep] = depth[mask_keep]
+            return sparse_depth
 
-    def create_rgbd(self, rgb, depth, num_samples):
-        sparse_depth = self.create_sparse_depth(depth, num_samples)
+    def create_rgbd(self, rgb, depth):
+        sparse_depth = self.create_sparse_depth(rgb, depth)
         # rgbd = np.dstack((rgb[:,:,0], rgb[:,:,1], rgb[:,:,2], sparse_depth))
         rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
         return rgbd
@@ -170,13 +167,13 @@ def __get_all_item__(self, index):
         # color normalization
         # rgb_tensor = normalize_rgb(rgb_tensor)
         # rgb_np = normalize_np(rgb_np)
-        
+
         if self.modality == 'rgb':
             input_np = rgb_np
         elif self.modality == 'rgbd':
-            input_np = self.create_rgbd(rgb_np, depth_np, self.num_samples)
+            input_np = self.create_rgbd(rgb_np, depth_np)
         elif self.modality == 'd':
-            input_np = self.create_sparse_depth(depth_np, self.num_samples)
+            input_np = self.create_sparse_depth(rgb_np, depth_np)
 
         input_tensor = to_tensor(input_np)
         while input_tensor.dim() < 3:
diff --git a/utils.py b/utils.py
@@ -2,24 +2,53 @@
 import matplotlib.pyplot as plt
 from PIL import Image
 
-cmap = plt.cm.jet
+cmap = plt.cm.viridis
 
-def merge_into_row(input, target, depth_pred):
+
+def colored_depthmap(depth, d_min=None, d_max=None):
+    if d_min is None:
+        d_min = np.min(depth)
+    if d_max is None:
+        d_max = np.max(depth)
+    depth_relative = (depth - d_min) / (d_max - d_min)
+    return 255 * cmap(depth_relative)[:,:,:3] # H, W, C
+
+
+def merge_into_row(input, depth_target, depth_pred):
     rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
-    depth = np.squeeze(target.cpu().numpy())
-    depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
-    depth = 255 * cmap(depth)[:,:,:3] # H, W, C
-    pred = np.squeeze(depth_pred.data.cpu().numpy())
-    pred = (pred - np.min(pred)) / (np.max(pred) - np.min(pred))
-    pred = 255 * cmap(pred)[:,:,:3] # H, W, C
-    img_merge = np.hstack([rgb, depth, pred])
+    depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
+    depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
+
+    d_min = min(np.min(depth_target_cpu), np.min(depth_pred_cpu))
+    d_max = max(np.max(depth_target_cpu), np.max(depth_pred_cpu))
+    depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
+    depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
+    img_merge = np.hstack([rgb, depth_target_col, depth_pred_col])
     
-    # img_merge.save(output_directory + '/comparison_' + str(epoch) + '.png')
     return img_merge
 
+
+def merge_into_row_with_gt(input, depth_input, depth_target, depth_pred):
+    rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
+    depth_input_cpu = np.squeeze(depth_input.cpu().numpy())
+    depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
+    depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
+
+    d_min = min(np.min(depth_input_cpu), np.min(depth_target_cpu), np.min(depth_pred_cpu))
+    d_max = max(np.max(depth_input_cpu), np.max(depth_target_cpu), np.max(depth_pred_cpu))
+    depth_input_col = colored_depthmap(depth_input_cpu, d_min, d_max)
+    depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
+    depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
+
+    img_merge = np.hstack([rgb, depth_input_col, depth_target_col, depth_pred_col])
+
+    return img_merge
+
+
 def add_row(img_merge, row):
     return np.vstack([img_merge, row])
 
+
 def save_image(img_merge, filename):
     img_merge = Image.fromarray(img_merge.astype('uint8'))
     img_merge.save(filename)