vita-epfl
diff --git a/‎README.md‎
Lines changed: 5 additions & 5 deletions b/‎README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎configs/intrinsics.yaml‎
Lines changed: 22 additions & 0 deletions b/‎configs/intrinsics.yaml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎docs/out_002282.png.multi.jpg‎
293 KB b/‎docs/out_002282.png.multi.jpg‎
293 KB
diff --git a/‎docs/out_002282.png.multi_all.jpg‎
351 KB b/‎docs/out_002282.png.multi_all.jpg‎
351 KB
diff --git a/‎monoloco/activity.py‎
Lines changed: 2 additions & 3 deletions b/‎monoloco/activity.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎monoloco/eval/eval_activity.py‎
Lines changed: 2 additions & 3 deletions b/‎monoloco/eval/eval_activity.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎monoloco/network/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎monoloco/network/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎monoloco/network/net.py‎
Lines changed: 0 additions & 1 deletion b/‎monoloco/network/net.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎monoloco/network/process.py‎
Lines changed: 26 additions & 20 deletions b/‎monoloco/network/process.py‎
Lines changed: 26 additions & 20 deletions
diff --git a/‎monoloco/predict.py‎
Lines changed: 51 additions & 34 deletions b/‎monoloco/predict.py‎
Lines changed: 51 additions & 34 deletions
@@ -128,7 +128,7 @@ For an example image, run the following command:
 python3 -m monoloco.run predict docs/002282.png \
 --path_gt names-kitti-200615-1022.json \
 -o <output directory> \
---long-edge <rescale the image by providing dimension of long side>
+--long-edge <rescale the image by providing dimension of long side> \
 --n_dropout <50 to include epistemic uncertainty, 0 otherwise>
 ```
 
@@ -156,18 +156,18 @@ You can load one or more image pairs using glob expressions. For example:
 
 ```sh
 python3 -m monoloco.run predict --mode stereo \
---glob docs/000840*.png
+--glob docs/000840*.png \
  --path_gt <to match results with ground-truths> \
- -o data/output  -long_edge 2500
+ -o data/output  --long-edge 2500
  ```
 
 ![Crowded scene](docs/out_000840_multi.jpg)
 
 ```sh
-python3 -m monoloco.run predict --glob docs/005523*.png \ --output_types multi \
+python3 -m monoloco.run predict --glob docs/005523*.png \
 --mode stereo \
 --path_gt <to match results with ground-truths> \
--o data/output  --long_edge 2500 \
+-o data/output  --long-edge 2500 \
 --instance-threshold 0.05 --seed-threshold 0.05
  ```
 
 
@@ -0,0 +1,22 @@
+
+kitti:
+  intrinsics:
+    - [718.3351, 0., 600.3891]
+    - [0., 718.3351, 181.5122]
+    - [0., 0., 1.]
+  im_size: [1238, 374]
+
+wv:
+  intrinsics:
+    - [1070.9498, 0., 987.4846]
+    - [0., 1070.726, 605.5297]
+    - [0., 0., 1.]
+  im_size: [1920, 1200]
+
+
+nuscenes:
+  intrinsics:
+    - [ 1070.9498, 0., 987.4846]
+    - [ 0., 1070.726, 605.5297]
+    - [ 0., 0., 1. ]
+  im_size: [1600, 900]
@@ -10,9 +10,8 @@
 import matplotlib.pyplot as plt
 
 from .network.process import laplace_sampling
-from .visuals.pifpaf_show import (
-    KeypointPainter, image_canvas, get_pifpaf_outputs, draw_orientation, social_distance_colors
-)
+from .visuals.pifpaf_show import KeypointPainter, image_canvas, get_pifpaf_outputs
+from .visuals.printer import draw_orientation, social_distance_colors
 
 
 def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False,
 
@@ -15,8 +15,7 @@
     ACCURACY_SCORE = None
 
 from ..prep import factory_file
-from ..network import Loco
-from ..network.process import factory_for_gt, preprocess_pifpaf
+from ..network import Loco, preprocess_pifpaf, load_calibration
 from ..activity import social_interactions
 from ..utils import open_annotations, get_iou_matches, get_difficulty
 
@@ -92,7 +91,7 @@ def eval_collective(self):
                 extension = '.predictions.json'
                 path_pif = os.path.join(self.dir_ann, basename + extension)
                 annotations = open_annotations(path_pif)
-                kk, _ = factory_for_gt(im_size)
+                kk = load_calibration(calibration='kitti', im_size=im_size)
 
                 # Collect corresponding gt files (ys_gt: 1 or 0)
                 boxes_gt, ys_gt = parse_gt_collective(self.dir_data, seq, path_pif)
 
@@ -1,3 +1,4 @@
 
 from .net import Loco
-from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
+from .process import load_calibration, factory_for_gt, preprocess_pifpaf, \
+    unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
@@ -123,7 +123,6 @@ def forward(self, keypoints, kk, keypoints_r=None):
 
                 # For Median baseline
                 # dic_out = median_disparity(dic_out, keypoints, keypoints_r, mask)
-
             if self.n_dropout > 0 and self.net != 'monstereo':
                 varss = self.epistemic_uncertainty(inputs)
                 dic_out['epi'] = varss
 
@@ -3,6 +3,7 @@
 import os
 import logging
 
+import yaml
 import numpy as np
 import torch
 import torchvision
@@ -66,30 +67,35 @@ def preprocess_monoloco(keypoints, kk, zero_center=False):
     return kps_out
 
 
-def factory_for_gt(im_size, focal_length=5.7, name=None, path_gt=None):
-    """Look for ground-truth annotations file and define calibration matrix based on image size """
-
-    if path_gt is not None:
-        assert os.path.exists(path_gt), "Ground-truth file not found"
-        with open(path_gt, 'r') as f:
-            dic_names = json.load(f)
-            kk = dic_names[name]['K']
-            dic_gt = dic_names[name]
-
-    # Without ground-truth-file
-    elif im_size[0] / im_size[1] > 2.5:  # KITTI default
-        kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]]  # Kitti calibration
-        dic_gt = None
-        logger.info("Using KITTI calibration matrix...")
-    else:  # nuScenes camera parameters
+def load_calibration(calibration, im_size, focal_length=5.7):
+    if calibration == 'custom':
         kk = [
             [im_size[0]*focal_length/Sx, 0., im_size[0]/2],
             [0., im_size[1]*focal_length/Sy, im_size[1]/2],
-            [0., 0., 1.]]
-        dic_gt = None
-        logger.info("Using a standard calibration matrix...")
+            [0., 0., 1.]
+        ]
+    else:
+        with open(os.path.join('configs', 'intrinsics.yaml')) as a:
+            configs = yaml.safe_load(a)
+        kk = configs[calibration]['intrinsics']
+        orig_size = configs[calibration]['im_size']
+        scale = [size / orig for size, orig in zip(im_size, orig_size)]
+        kk[0] = [el * scale[0] for el in kk[0]]
+        kk[1] = [el * scale[1] for el in kk[1]]
+    logger.info("Using {} calibration matrix".format(calibration))
+    return kk
+
+
+def factory_for_gt(path_gt, name=None):
+    """Look for ground-truth annotations file and define calibration matrix based on image size """
+
+    assert os.path.exists(path_gt), "Ground-truth file not found"
+    with open(path_gt, 'r') as f:
+        dic_names = json.load(f)
+        kk = dic_names[name]['K']
+        dic_gt = dic_names[name]
 
-    return kk, dic_gt
+    return dic_gt, kk
 
 
 def laplace_sampling(outputs, n_samples):
 
@@ -11,14 +11,15 @@
 import json
 import copy
 import logging
+import time
 from collections import defaultdict
 
-
+import numpy as np
 import torch
 import PIL
 import openpifpaf
 from openpifpaf import datasets
-from openpifpaf import decoder, network, visualizer, show, logger
+from openpifpaf import decoder, network, visualizer, show, logger, Predictor
 from openpifpaf.predict import out_name
 
 try:
@@ -27,8 +28,7 @@
 except ImportError:
     DOWNLOAD = None
 from .visuals.printer import Printer
-from .network import Loco
-from .network.process import factory_for_gt, preprocess_pifpaf
+from .network import Loco, factory_for_gt, load_calibration, preprocess_pifpaf
 from .activity import show_activities
 
 LOG = logging.getLogger(__name__)
@@ -83,7 +83,6 @@ def download_checkpoints(args):
     else:
         path = MONOLOCO_MODEL_KI
         name = 'monoloco_pp-201203-1424.pkl'
-
     model = os.path.join(torch_dir, name)
     dic_models[args.mode] = model
     if not os.path.exists(model):
@@ -92,6 +91,7 @@ def download_checkpoints(args):
             "pip install gdown to download a monoloco model, or pass the model path as --model"
         LOG.info('Downloading model in %s', torch_dir)
         DOWNLOAD(path, model, quiet=False)
+    print(f"Using model: {name}")
     return dic_models
 
 
@@ -121,6 +121,8 @@ def factory_from_args(args):
     LOG.debug('neural network device: %s', args.device)
 
     # Add visualization defaults
+    if not args.output_types and args.mode != 'keypoints':
+        args.output_types = ['multi']
     args.figure_width = 10
     args.dpi_factor = 1.0
 
@@ -141,11 +143,12 @@ def factory_from_args(args):
 
     if args.mode != 'keypoints':
         assert any((xx in args.output_types for xx in ['front', 'bird', 'multi', 'json'])), \
-        "No output type specified, please select one among front, bird, multi, json, or choose mode=keypoints"
+            "No output type specified, please select one among front, bird, multi, json, or choose mode=keypoints"
 
     # Configure
     decoder.configure(args)
     network.Factory.configure(args)
+    Predictor.configure(args)
     show.configure(args)
     visualizer.configure(args)
 
@@ -157,7 +160,6 @@ def predict(args):
     cnt = 0
     assert args.mode in ('keypoints', 'mono', 'stereo')
     args, dic_models = factory_from_args(args)
-
     # Load Models
     if args.mode in ('mono', 'stereo'):
         net = Loco(
@@ -167,18 +169,20 @@ def predict(args):
             n_dropout=args.n_dropout,
             p_dropout=args.dropout)
 
-    # for openpifpaf predicitons
-    predictor = openpifpaf.Predictor(checkpoint=args.checkpoint)
+    # for openpifpaf predictions
+    predictor = Predictor(checkpoint=args.checkpoint)
 
     # data
     data = datasets.ImageList(args.images, preprocess=predictor.preprocess)
     if args.mode == 'stereo':
         assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
 
     pifpaf_outs = {}
+    start = time.time()
+    timing = []
     for idx, (pred, _, meta) in enumerate(predictor.images(args.images, batch_size=args.batch_size)):
 
-        if idx % args.batch_size != 0: # Only for MonStereo
+        if idx % args.batch_size != 0:  # Only for MonStereo
             pifpaf_outs['right'] = [ann.json_data() for ann in pred]
         else:
             if args.json_output is not None:
@@ -187,11 +191,10 @@ def predict(args):
                 with open(json_out_name, 'w') as f:
                     json.dump([ann.json_data() for ann in pred], f)
 
-            with open(meta['file_name'], 'rb') as f:
-                cpu_image = PIL.Image.open(f).convert('RGB')
             pifpaf_outs['pred'] = pred
             pifpaf_outs['left'] = [ann.json_data() for ann in pred]
-            pifpaf_outs['image'] = cpu_image
+            pifpaf_outs['file_name'] = meta['file_name']
+            pifpaf_outs['width_height'] = meta['width_height']
 
             # Set output image name
             if args.output_directory is None:
@@ -207,18 +210,27 @@ def predict(args):
 
         if (args.mode == 'mono') or (args.mode == 'stereo' and idx % args.batch_size != 0):
             # 3D Predictions
-            if args.mode != 'keypoints':
-                im_size = (cpu_image.size[0], cpu_image.size[1])  # Original
-                kk, dic_gt = factory_for_gt(
-                    im_size, focal_length=args.focal, name=im_name, path_gt=args.path_gt)
+            if args.mode == 'keypoints':
+                dic_out = defaultdict(list)
+                kk = None
+            else:
+                im_size = (float(pifpaf_outs['width_height'][0]), float(pifpaf_outs['width_height'][1]))
 
+                if args.path_gt is not None:
+                    dic_gt, kk = factory_for_gt(args.path_gt, im_name)
+                else:
+                    kk = load_calibration(args.calibration, im_size, focal_length=args.focal_length)
+                    dic_gt = None
                 # Preprocess pifpaf outputs and run monoloco
                 boxes, keypoints = preprocess_pifpaf(
                     pifpaf_outs['left'], im_size, enlarge_boxes=False)
 
                 if args.mode == 'mono':
                     LOG.info("Prediction with MonoLoco++")
                     dic_out = net.forward(keypoints, kk)
+                    fwd_time = (time.time()-start)*1000
+                    timing.append(fwd_time)  # Skip Reordering and saving images
+                    print(f"Forward time: {fwd_time:.0f} ms")
                     dic_out = net.post_process(
                         dic_out, boxes, keypoints, kk, dic_gt)
                     if 'social_distance' in args.activities:
@@ -230,41 +242,46 @@ def predict(args):
                     LOG.info("Prediction with MonStereo")
                     _, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
                     dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r)
+                    fwd_time = (time.time()-start)*1000
+                    timing.append(fwd_time)
                     dic_out = net.post_process(
                         dic_out, boxes, keypoints, kk, dic_gt)
 
-            else:
-                dic_out = defaultdict(list)
-                kk = None
-
-            # Outputs
+            # Output
             factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=kk)
             print(f'Image {cnt}\n' + '-' * 120)
             cnt += 1
+            start = time.time()
+    timing = np.array(timing)
+    avg_time = int(np.mean(timing))
+    std_time = int(np.std(timing))
+    print(f'Processed {idx * args.batch_size} images with an average time of {avg_time} ms and a std of {std_time} ms')
 
 
 def factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=None):
-    """Output json files or images according to the choice"""
-
-    if 'social_distance' in args.activities:
-        assert args.mode == 'mono', "Social distancing only works with monocular network"
+    """
+    Output json files or images according to the choice
+    """
+    if 'json' in args.output_types:
+        with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
+            json.dump(dic_out, ff)
+        if len(args.output_types) == 1:
+            return
 
+    with open(pifpaf_outs['file_name'], 'rb') as f:
+        cpu_image = PIL.Image.open(f).convert('RGB')
     if args.mode == 'keypoints':
         annotation_painter = openpifpaf.show.AnnotationPainter()
-        with openpifpaf.show.image_canvas(pifpaf_outs['image'], output_path) as ax:
+        with openpifpaf.show.image_canvas(cpu_image, output_path) as ax:
             annotation_painter.annotations(ax, pifpaf_outs['pred'])
         return
 
     if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
         LOG.info(output_path)
         if args.activities:
             show_activities(
-                args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out)
+                args, cpu_image, output_path, pifpaf_outs['left'], dic_out)
         else:
-            printer = Printer(pifpaf_outs['image'], output_path, kk, args)
+            printer = Printer(cpu_image, output_path, kk, args)
             figures, axes = printer.factory_axes(dic_out)
-            printer.draw(figures, axes, pifpaf_outs['image'])
-
-    if 'json' in args.output_types:
-        with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
-            json.dump(dic_out, ff)
+            printer.draw(figures, axes, cpu_image, dic_out)