Skip to content

Commit 42b1d20

Browse files
authored
Calibration (#68)
* add timing and postpone the image loading * add calibration * make z_max dynamic when non specified * scale 7 * add yaml for calibration: * fix long edge in predictor * fix stereo and clean colors * update images * update path for configs'
1 parent ac21008 commit 42b1d20

File tree

17 files changed

+264
-210
lines changed

17 files changed

+264
-210
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ For an example image, run the following command:
128128
python3 -m monoloco.run predict docs/002282.png \
129129
--path_gt names-kitti-200615-1022.json \
130130
-o <output directory> \
131-
--long-edge <rescale the image by providing dimension of long side>
131+
--long-edge <rescale the image by providing dimension of long side> \
132132
--n_dropout <50 to include epistemic uncertainty, 0 otherwise>
133133
```
134134

@@ -156,18 +156,18 @@ You can load one or more image pairs using glob expressions. For example:
156156

157157
```sh
158158
python3 -m monoloco.run predict --mode stereo \
159-
--glob docs/000840*.png
159+
--glob docs/000840*.png \
160160
--path_gt <to match results with ground-truths> \
161-
-o data/output -long_edge 2500
161+
-o data/output --long-edge 2500
162162
```
163163

164164
![Crowded scene](docs/out_000840_multi.jpg)
165165

166166
```sh
167-
python3 -m monoloco.run predict --glob docs/005523*.png \ --output_types multi \
167+
python3 -m monoloco.run predict --glob docs/005523*.png \
168168
--mode stereo \
169169
--path_gt <to match results with ground-truths> \
170-
-o data/output --long_edge 2500 \
170+
-o data/output --long-edge 2500 \
171171
--instance-threshold 0.05 --seed-threshold 0.05
172172
```
173173

configs/intrinsics.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
kitti:
3+
intrinsics:
4+
- [718.3351, 0., 600.3891]
5+
- [0., 718.3351, 181.5122]
6+
- [0., 0., 1.]
7+
im_size: [1238, 374]
8+
9+
wv:
10+
intrinsics:
11+
- [1070.9498, 0., 987.4846]
12+
- [0., 1070.726, 605.5297]
13+
- [0., 0., 1.]
14+
im_size: [1920, 1200]
15+
16+
17+
nuscenes:
18+
intrinsics:
19+
- [ 1070.9498, 0., 987.4846]
20+
- [ 0., 1070.726, 605.5297]
21+
- [ 0., 0., 1. ]
22+
im_size: [1600, 900]

docs/out_002282.png.multi.jpg

293 KB
Loading

docs/out_002282.png.multi_all.jpg

351 KB
Loading

monoloco/activity.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
import matplotlib.pyplot as plt
1111

1212
from .network.process import laplace_sampling
13-
from .visuals.pifpaf_show import (
14-
KeypointPainter, image_canvas, get_pifpaf_outputs, draw_orientation, social_distance_colors
15-
)
13+
from .visuals.pifpaf_show import KeypointPainter, image_canvas, get_pifpaf_outputs
14+
from .visuals.printer import draw_orientation, social_distance_colors
1615

1716

1817
def social_interactions(idx, centers, angles, dds, stds=None, social_distance=False,

monoloco/eval/eval_activity.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
ACCURACY_SCORE = None
1616

1717
from ..prep import factory_file
18-
from ..network import Loco
19-
from ..network.process import factory_for_gt, preprocess_pifpaf
18+
from ..network import Loco, preprocess_pifpaf, load_calibration
2019
from ..activity import social_interactions
2120
from ..utils import open_annotations, get_iou_matches, get_difficulty
2221

@@ -92,7 +91,7 @@ def eval_collective(self):
9291
extension = '.predictions.json'
9392
path_pif = os.path.join(self.dir_ann, basename + extension)
9493
annotations = open_annotations(path_pif)
95-
kk, _ = factory_for_gt(im_size)
94+
kk = load_calibration(calibration='kitti', im_size=im_size)
9695

9796
# Collect corresponding gt files (ys_gt: 1 or 0)
9897
boxes_gt, ys_gt = parse_gt_collective(self.dir_data, seq, path_pif)

monoloco/network/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11

22
from .net import Loco
3-
from .process import unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux
3+
from .process import load_calibration, factory_for_gt, preprocess_pifpaf, \
4+
unnormalize_bi, extract_outputs, extract_labels, extract_labels_aux

monoloco/network/net.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def forward(self, keypoints, kk, keypoints_r=None):
123123

124124
# For Median baseline
125125
# dic_out = median_disparity(dic_out, keypoints, keypoints_r, mask)
126-
127126
if self.n_dropout > 0 and self.net != 'monstereo':
128127
varss = self.epistemic_uncertainty(inputs)
129128
dic_out['epi'] = varss

monoloco/network/process.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import logging
55

6+
import yaml
67
import numpy as np
78
import torch
89
import torchvision
@@ -66,30 +67,35 @@ def preprocess_monoloco(keypoints, kk, zero_center=False):
6667
return kps_out
6768

6869

69-
def factory_for_gt(im_size, focal_length=5.7, name=None, path_gt=None):
70-
"""Look for ground-truth annotations file and define calibration matrix based on image size """
71-
72-
if path_gt is not None:
73-
assert os.path.exists(path_gt), "Ground-truth file not found"
74-
with open(path_gt, 'r') as f:
75-
dic_names = json.load(f)
76-
kk = dic_names[name]['K']
77-
dic_gt = dic_names[name]
78-
79-
# Without ground-truth-file
80-
elif im_size[0] / im_size[1] > 2.5: # KITTI default
81-
kk = [[718.3351, 0., 600.3891], [0., 718.3351, 181.5122], [0., 0., 1.]] # Kitti calibration
82-
dic_gt = None
83-
logger.info("Using KITTI calibration matrix...")
84-
else: # nuScenes camera parameters
70+
def load_calibration(calibration, im_size, focal_length=5.7):
71+
if calibration == 'custom':
8572
kk = [
8673
[im_size[0]*focal_length/Sx, 0., im_size[0]/2],
8774
[0., im_size[1]*focal_length/Sy, im_size[1]/2],
88-
[0., 0., 1.]]
89-
dic_gt = None
90-
logger.info("Using a standard calibration matrix...")
75+
[0., 0., 1.]
76+
]
77+
else:
78+
with open(os.path.join('configs', 'intrinsics.yaml')) as a:
79+
configs = yaml.safe_load(a)
80+
kk = configs[calibration]['intrinsics']
81+
orig_size = configs[calibration]['im_size']
82+
scale = [size / orig for size, orig in zip(im_size, orig_size)]
83+
kk[0] = [el * scale[0] for el in kk[0]]
84+
kk[1] = [el * scale[1] for el in kk[1]]
85+
logger.info("Using {} calibration matrix".format(calibration))
86+
return kk
87+
88+
89+
def factory_for_gt(path_gt, name=None):
90+
"""Look for ground-truth annotations file and define calibration matrix based on image size """
91+
92+
assert os.path.exists(path_gt), "Ground-truth file not found"
93+
with open(path_gt, 'r') as f:
94+
dic_names = json.load(f)
95+
kk = dic_names[name]['K']
96+
dic_gt = dic_names[name]
9197

92-
return kk, dic_gt
98+
return dic_gt, kk
9399

94100

95101
def laplace_sampling(outputs, n_samples):

monoloco/predict.py

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
import json
1212
import copy
1313
import logging
14+
import time
1415
from collections import defaultdict
1516

16-
17+
import numpy as np
1718
import torch
1819
import PIL
1920
import openpifpaf
2021
from openpifpaf import datasets
21-
from openpifpaf import decoder, network, visualizer, show, logger
22+
from openpifpaf import decoder, network, visualizer, show, logger, Predictor
2223
from openpifpaf.predict import out_name
2324

2425
try:
@@ -27,8 +28,7 @@
2728
except ImportError:
2829
DOWNLOAD = None
2930
from .visuals.printer import Printer
30-
from .network import Loco
31-
from .network.process import factory_for_gt, preprocess_pifpaf
31+
from .network import Loco, factory_for_gt, load_calibration, preprocess_pifpaf
3232
from .activity import show_activities
3333

3434
LOG = logging.getLogger(__name__)
@@ -83,7 +83,6 @@ def download_checkpoints(args):
8383
else:
8484
path = MONOLOCO_MODEL_KI
8585
name = 'monoloco_pp-201203-1424.pkl'
86-
8786
model = os.path.join(torch_dir, name)
8887
dic_models[args.mode] = model
8988
if not os.path.exists(model):
@@ -92,6 +91,7 @@ def download_checkpoints(args):
9291
"pip install gdown to download a monoloco model, or pass the model path as --model"
9392
LOG.info('Downloading model in %s', torch_dir)
9493
DOWNLOAD(path, model, quiet=False)
94+
print(f"Using model: {name}")
9595
return dic_models
9696

9797

@@ -121,6 +121,8 @@ def factory_from_args(args):
121121
LOG.debug('neural network device: %s', args.device)
122122

123123
# Add visualization defaults
124+
if not args.output_types and args.mode != 'keypoints':
125+
args.output_types = ['multi']
124126
args.figure_width = 10
125127
args.dpi_factor = 1.0
126128

@@ -141,11 +143,12 @@ def factory_from_args(args):
141143

142144
if args.mode != 'keypoints':
143145
assert any((xx in args.output_types for xx in ['front', 'bird', 'multi', 'json'])), \
144-
"No output type specified, please select one among front, bird, multi, json, or choose mode=keypoints"
146+
"No output type specified, please select one among front, bird, multi, json, or choose mode=keypoints"
145147

146148
# Configure
147149
decoder.configure(args)
148150
network.Factory.configure(args)
151+
Predictor.configure(args)
149152
show.configure(args)
150153
visualizer.configure(args)
151154

@@ -157,7 +160,6 @@ def predict(args):
157160
cnt = 0
158161
assert args.mode in ('keypoints', 'mono', 'stereo')
159162
args, dic_models = factory_from_args(args)
160-
161163
# Load Models
162164
if args.mode in ('mono', 'stereo'):
163165
net = Loco(
@@ -167,18 +169,20 @@ def predict(args):
167169
n_dropout=args.n_dropout,
168170
p_dropout=args.dropout)
169171

170-
# for openpifpaf predicitons
171-
predictor = openpifpaf.Predictor(checkpoint=args.checkpoint)
172+
# for openpifpaf predictions
173+
predictor = Predictor(checkpoint=args.checkpoint)
172174

173175
# data
174176
data = datasets.ImageList(args.images, preprocess=predictor.preprocess)
175177
if args.mode == 'stereo':
176178
assert len(data.image_paths) % 2 == 0, "Odd number of images in a stereo setting"
177179

178180
pifpaf_outs = {}
181+
start = time.time()
182+
timing = []
179183
for idx, (pred, _, meta) in enumerate(predictor.images(args.images, batch_size=args.batch_size)):
180184

181-
if idx % args.batch_size != 0: # Only for MonStereo
185+
if idx % args.batch_size != 0: # Only for MonStereo
182186
pifpaf_outs['right'] = [ann.json_data() for ann in pred]
183187
else:
184188
if args.json_output is not None:
@@ -187,11 +191,10 @@ def predict(args):
187191
with open(json_out_name, 'w') as f:
188192
json.dump([ann.json_data() for ann in pred], f)
189193

190-
with open(meta['file_name'], 'rb') as f:
191-
cpu_image = PIL.Image.open(f).convert('RGB')
192194
pifpaf_outs['pred'] = pred
193195
pifpaf_outs['left'] = [ann.json_data() for ann in pred]
194-
pifpaf_outs['image'] = cpu_image
196+
pifpaf_outs['file_name'] = meta['file_name']
197+
pifpaf_outs['width_height'] = meta['width_height']
195198

196199
# Set output image name
197200
if args.output_directory is None:
@@ -207,18 +210,27 @@ def predict(args):
207210

208211
if (args.mode == 'mono') or (args.mode == 'stereo' and idx % args.batch_size != 0):
209212
# 3D Predictions
210-
if args.mode != 'keypoints':
211-
im_size = (cpu_image.size[0], cpu_image.size[1]) # Original
212-
kk, dic_gt = factory_for_gt(
213-
im_size, focal_length=args.focal, name=im_name, path_gt=args.path_gt)
213+
if args.mode == 'keypoints':
214+
dic_out = defaultdict(list)
215+
kk = None
216+
else:
217+
im_size = (float(pifpaf_outs['width_height'][0]), float(pifpaf_outs['width_height'][1]))
214218

219+
if args.path_gt is not None:
220+
dic_gt, kk = factory_for_gt(args.path_gt, im_name)
221+
else:
222+
kk = load_calibration(args.calibration, im_size, focal_length=args.focal_length)
223+
dic_gt = None
215224
# Preprocess pifpaf outputs and run monoloco
216225
boxes, keypoints = preprocess_pifpaf(
217226
pifpaf_outs['left'], im_size, enlarge_boxes=False)
218227

219228
if args.mode == 'mono':
220229
LOG.info("Prediction with MonoLoco++")
221230
dic_out = net.forward(keypoints, kk)
231+
fwd_time = (time.time()-start)*1000
232+
timing.append(fwd_time) # Skip Reordering and saving images
233+
print(f"Forward time: {fwd_time:.0f} ms")
222234
dic_out = net.post_process(
223235
dic_out, boxes, keypoints, kk, dic_gt)
224236
if 'social_distance' in args.activities:
@@ -230,41 +242,46 @@ def predict(args):
230242
LOG.info("Prediction with MonStereo")
231243
_, keypoints_r = preprocess_pifpaf(pifpaf_outs['right'], im_size)
232244
dic_out = net.forward(keypoints, kk, keypoints_r=keypoints_r)
245+
fwd_time = (time.time()-start)*1000
246+
timing.append(fwd_time)
233247
dic_out = net.post_process(
234248
dic_out, boxes, keypoints, kk, dic_gt)
235249

236-
else:
237-
dic_out = defaultdict(list)
238-
kk = None
239-
240-
# Outputs
250+
# Output
241251
factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=kk)
242252
print(f'Image {cnt}\n' + '-' * 120)
243253
cnt += 1
254+
start = time.time()
255+
timing = np.array(timing)
256+
avg_time = int(np.mean(timing))
257+
std_time = int(np.std(timing))
258+
print(f'Processed {idx * args.batch_size} images with an average time of {avg_time} ms and a std of {std_time} ms')
244259

245260

246261
def factory_outputs(args, pifpaf_outs, dic_out, output_path, kk=None):
247-
"""Output json files or images according to the choice"""
248-
249-
if 'social_distance' in args.activities:
250-
assert args.mode == 'mono', "Social distancing only works with monocular network"
262+
"""
263+
Output json files or images according to the choice
264+
"""
265+
if 'json' in args.output_types:
266+
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
267+
json.dump(dic_out, ff)
268+
if len(args.output_types) == 1:
269+
return
251270

271+
with open(pifpaf_outs['file_name'], 'rb') as f:
272+
cpu_image = PIL.Image.open(f).convert('RGB')
252273
if args.mode == 'keypoints':
253274
annotation_painter = openpifpaf.show.AnnotationPainter()
254-
with openpifpaf.show.image_canvas(pifpaf_outs['image'], output_path) as ax:
275+
with openpifpaf.show.image_canvas(cpu_image, output_path) as ax:
255276
annotation_painter.annotations(ax, pifpaf_outs['pred'])
256277
return
257278

258279
if any((xx in args.output_types for xx in ['front', 'bird', 'multi'])):
259280
LOG.info(output_path)
260281
if args.activities:
261282
show_activities(
262-
args, pifpaf_outs['image'], output_path, pifpaf_outs['left'], dic_out)
283+
args, cpu_image, output_path, pifpaf_outs['left'], dic_out)
263284
else:
264-
printer = Printer(pifpaf_outs['image'], output_path, kk, args)
285+
printer = Printer(cpu_image, output_path, kk, args)
265286
figures, axes = printer.factory_axes(dic_out)
266-
printer.draw(figures, axes, pifpaf_outs['image'])
267-
268-
if 'json' in args.output_types:
269-
with open(os.path.join(output_path + '.monoloco.json'), 'w') as ff:
270-
json.dump(dic_out, ff)
287+
printer.draw(figures, axes, cpu_image, dic_out)

0 commit comments

Comments
 (0)