update run_inference

Clément Pinard · Clément Pinard · commit 02d58e0f9f7e · 2019-04-04T14:36:37.000+02:00
* now uses imageio for img reading/writing
* does not need to import the whole main script, just the util
* more options, to allow for different values to output
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ It has not been tested for multiple GPU, but it should work just as in original
 
 The code provides a training example, using [the flying chair dataset](http://lmb.informatik.uni-freiburg.de/resources/datasets/FlyingChairs.en.html) , with data augmentation. An implementation for [Scene Flow Datasets](http://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) may be added in the future.
 
-Two neural network models are currently provided :
+Two neural network models are currently provided, along with their batch norm variation (experimental) :
 
  - **FlowNetS**
  - **FlowNetSBN**
@@ -22,12 +22,12 @@ Thanks to [Kaixhin](https://github.com/Kaixhin) you can download a pretrained ve
 Directly feed the downloaded Network to the script, you don't need to uncompress it even if your desktop environment tells you so.
 
 ### Note on networks from caffe
-These networks expect a BGR input in range `[-0.5,0.5]` (compared to RGB in pytorch). However, BGR order is not very important.
+These networks expect a BGR input (compared to RGB in pytorch). However, BGR order is not very important.
 
 ## Prerequisite
 
 ```
-pytorch >= 0.4.1
+pytorch >= 1.0.1
 tensorboard-pytorch
 tensorboardX >= 1.4
 spatial-correlation-sampler>=0.0.8
@@ -88,6 +88,22 @@ Exact code for Optical Flow -> Color map can be found [here](main.py#L321)
 | <img src='images/input_2.gif' width=256> | <img src='images/pred_2.png' width=256> | <img src='images/GT_2.png' width=256> |
 | <img src='images/input_3.gif' width=256> | <img src='images/pred_3.png' width=256> | <img src='images/GT_3.png' width=256> |
 
+## Running inference on a set of image pairs
+
+If you need to run the network on your images, you can download a pretrained network [here](https://drive.google.com/open?id=0B5EC7HMbyk3CbjFPb0RuODI3NmM) and launch the inference script on your folder of image pairs.
+
+Your folder needs to have all the images pairs in the same location, with the name pattern
+```
+{image_name}1.{ext}
+{image_name}2.{ext}
+```
+
+```bash
+python3 run_inference.py /path/to/images/folder /path/to/pretrained
+```
+
+As for the `main.py` script, a help menu is available for additional options.
+
 ## Note on transform functions
 
 In order to have coherent transformations between inputs and target, we must define new transformations that take both input and target, as a new random variable is defined each time a random transformation is called.
diff --git a/main.py b/main.py
@@ -1,6 +1,5 @@
 import argparse
 import os
-import shutil
 import time
 
 import torch
@@ -16,13 +15,12 @@
 from multiscaleloss import multiscaleEPE, realEPE
 import datetime
 from tensorboardX import SummaryWriter
-import numpy as np
+from util import flow2rgb, AverageMeter, save_checkpoint
 
 model_names = sorted(name for name in models.__dict__
                      if name.islower() and not name.startswith("__"))
 dataset_names = sorted(name for name in datasets.__all__)
 
-
 parser = argparse.ArgumentParser(description='PyTorch FlowNet Training on several datasets',
                                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument('data', metavar='DIR',
@@ -86,7 +84,7 @@
 
 
 def main():
-    global args, best_EPE, save_path
+    global args, best_EPE
     args = parser.parse_args()
     save_path = '{},{},{}epochs{},b{},lr{}'.format(
         args.arch,
@@ -209,7 +207,7 @@ def main():
             'state_dict': model.module.state_dict(),
             'best_EPE': best_EPE,
             'div_flow': args.div_flow
-        }, is_best)
+        }, is_best, save_path)
 
 
 def train(train_loader, model, optimizer, epoch, train_writer):
@@ -308,48 +306,5 @@ def validate(val_loader, model, epoch, output_writers):
     return flow2_EPEs.avg
 
 
-def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
-    torch.save(state, os.path.join(save_path,filename))
-    if is_best:
-        shutil.copyfile(os.path.join(save_path,filename), os.path.join(save_path,'model_best.pth.tar'))
-
-
-class AverageMeter(object):
-    """Computes and stores the average and current value"""
-
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.val = 0
-        self.avg = 0
-        self.sum = 0
-        self.count = 0
-
-    def update(self, val, n=1):
-        self.val = val
-        self.sum += val * n
-        self.count += n
-        self.avg = self.sum / self.count
-
-    def __repr__(self):
-        return '{:.3f} ({:.3f})'.format(self.val, self.avg)
-
-
-def flow2rgb(flow_map, max_value):
-    flow_map_np = flow_map.detach().cpu().numpy()
-    _, h, w = flow_map_np.shape
-    flow_map_np[:,(flow_map_np[0] == 0) & (flow_map_np[1] == 0)] = float('nan')
-    rgb_map = np.ones((3,h,w)).astype(np.float32)
-    if max_value is not None:
-        normalized_flow_map = flow_map_np / max_value
-    else:
-        normalized_flow_map = flow_map_np / (np.abs(flow_map_np).max())
-    rgb_map[0] += normalized_flow_map[0]
-    rgb_map[1] -= 0.5*(normalized_flow_map[0] + normalized_flow_map[1])
-    rgb_map[2] += normalized_flow_map[1]
-    return rgb_map.clip(0,1)
-
-
 if __name__ == '__main__':
     main()
diff --git a/run_inference.py b/run_inference.py
@@ -6,12 +6,12 @@
 import torch.nn.functional as F
 import models
 from tqdm import tqdm
+
 import torchvision.transforms as transforms
 import flow_transforms
-from scipy.ndimage import imread
-from scipy.misc import imsave
+from imageio import imread, imwrite
 import numpy as np
-from main import flow2rgb
+from util import flow2rgb
 
 model_names = sorted(name for name in models.__dict__
                      if name.islower() and not name.startswith("__"))
@@ -22,11 +22,15 @@
 parser.add_argument('data', metavar='DIR',
                     help='path to images folder, image names must match \'[name]0.[ext]\' and \'[name]1.[ext]\'')
 parser.add_argument('pretrained', metavar='PTH', help='path to pre-trained model')
-parser.add_argument('--output', metavar='DIR', default=None,
+parser.add_argument('--output', '-o', metavar='DIR', default=None,
                     help='path to output folder. If not set, will be created in data folder')
+parser.add_argument('--output-value', '-v', metavar='VAL', choices=['raw', 'vis', 'both'], default='both',
+                    help='which value to output, between raw input (as a npy file) and color vizualisation (as an image file).'
+                    ' If not set, will output both')
 parser.add_argument('--div-flow', default=20, type=float,
                     help='value by which flow will be divided. overwritten if stored in pretrained file')
-parser.add_argument("--img-exts", default=['png', 'jpg', 'bmp'], nargs='*', type=str, help="images extensions to glob")
+parser.add_argument("--img-exts", metavar='EXT', default=['png', 'jpg', 'bmp', 'ppm'], nargs='*', type=str,
+                    help="images extensions to glob")
 parser.add_argument('--max_flow', default=None, type=float,
                     help='max flow value. Flow map color is saturated above this value. If not set, will use flow map\'s max value')
 parser.add_argument('--upsampling', '-u', choices=['nearest', 'bilinear'], default=None, help='if not set, will output FlowNet raw input,'
@@ -40,6 +44,14 @@
 def main():
     global args, save_path
     args = parser.parse_args()
+
+    if args.output_value == 'both':
+        output_string = "raw output and RGB visualization"
+    elif args.output_value == 'raw':
+        output_string = "raw output"
+    elif args.output_value == 'vis':
+        output_string = "RGB visualization"
+    print("=> will save " + output_string)
     data_dir = Path(args.data)
     print("=> fetching img pairs in '{}'".format(args.data))
     if args.output is None:
@@ -58,9 +70,9 @@ def main():
 
     img_pairs = []
     for ext in args.img_exts:
-        test_files = data_dir.files('*0.{}'.format(ext))
+        test_files = data_dir.files('*1.{}'.format(ext))
         for file in test_files:
-            img_pair = file.parent / (file.namebase[:-1] + '1.{}'.format(ext))
+            img_pair = file.parent / (file.namebase[:-1] + '2.{}'.format(ext))
             if img_pair.isfile():
                 img_pairs.append([file, img_pair])
 
@@ -92,9 +104,13 @@ def main():
         if args.upsampling is not None:
             output = F.interpolate(output, size=img1.size()[-2:], mode=args.upsampling, align_corners=False)
         for suffix, flow_output in zip(['flow', 'inv_flow'], output):
-            rgb_flow = flow2rgb(args.div_flow * flow_output, max_value=args.max_flow)
-            to_save = (rgb_flow * 255).astype(np.uint8).transpose(1,2,0)
-            imsave(save_path/'{}{}.png'.format(img1_file.namebase[:-1], suffix), to_save)
+            filename = save_path/'{}{}'.format(img1_file.namebase[:-1], suffix)
+            if args.output_value in['vis', 'both']:
+                rgb_flow = flow2rgb(args.div_flow * flow_output, max_value=args.max_flow)
+                to_save = (rgb_flow * 255).astype(np.uint8).transpose(1,2,0)
+                imwrite(filename + '.png', to_save)
+            if args.output_value in ['raw', 'both']:
+                np.save(filename + 'npy', flow_output.cpu().numpy())
 
 
 if __name__ == '__main__':