Skip to content

Commit cac409f

Browse files
committed
eval_tracker
1 parent 05fe0a4 commit cac409f

File tree

3 files changed

+395
-0
lines changed

3 files changed

+395
-0
lines changed

tools/eval/datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .icdar import ICDAR
55
from .iiit5k import IIIT5K
66
from .minisupervisely import MiniSupervisely
7+
from .otb import OTB
78

89
class Registery:
910
def __init__(self, name):
@@ -23,3 +24,4 @@ def register(self, item):
2324
DATASETS.register(ICDAR)
2425
DATASETS.register(IIIT5K)
2526
DATASETS.register(MiniSupervisely)
27+
DATASETS.register(OTB)

tools/eval/datasets/otb.py

Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
import os
2+
import json
3+
import numpy as np
4+
import cv2 as cv
5+
from colorama import Style, Fore
6+
from tqdm import tqdm
7+
from multiprocessing import Pool
8+
9+
def overlap_ratio(rect1, rect2):
10+
'''Compute overlap ratio between two rects
11+
Args
12+
rect:2d array of N x [x,y,w,h]
13+
Return:
14+
iou
15+
'''
16+
left = np.maximum(rect1[:,0], rect2[:,0])
17+
right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
18+
top = np.maximum(rect1[:,1], rect2[:,1])
19+
bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
20+
21+
intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
22+
union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
23+
iou = intersect / union
24+
iou = np.maximum(np.minimum(1, iou), 0)
25+
return iou
26+
def success_overlap(gt_bb, result_bb, n_frame):
27+
thresholds_overlap = np.arange(0, 1.05, 0.05)
28+
success = np.zeros(len(thresholds_overlap))
29+
iou = np.ones(len(gt_bb)) * (-1)
30+
# mask = np.sum(gt_bb > 0, axis=1) == 4 #TODO check all dataset
31+
mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
32+
# print(len(gt_bb))
33+
# print(len(result_bb))
34+
iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
35+
for i in range(len(thresholds_overlap)):
36+
success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
37+
return success
38+
39+
def success_error(gt_center, result_center, thresholds, n_frame):
40+
# n_frame = len(gt_center)
41+
success = np.zeros(len(thresholds))
42+
dist = np.ones(len(gt_center)) * (-1)
43+
mask = np.sum(gt_center > 0, axis=1) == 2
44+
dist[mask] = np.sqrt(np.sum(
45+
np.power(gt_center[mask] - result_center[mask], 2), axis=1))
46+
for i in range(len(thresholds)):
47+
success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
48+
return success
49+
50+
class OPEBenchmark:
51+
def __init__(self, dataset):
52+
self.dataset = dataset
53+
54+
def convert_bb_to_center(self, bboxes):
55+
return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
56+
(bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
57+
58+
def convert_bb_to_norm_center(self, bboxes, gt_wh):
59+
return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16)
60+
61+
def eval_success(self,tracker):
62+
success_ret = {}
63+
success_ret_ = {}
64+
for video in self.dataset:
65+
gt_traj = np.array(video.gt_traj)
66+
tracker_traj = video.load_tracker()
67+
tracker_traj = np.array(tracker_traj)
68+
n_frame = len(gt_traj)
69+
if hasattr(video, 'absent'):
70+
gt_traj = gt_traj[video.absent == 1]
71+
tracker_traj = tracker_traj[video.absent == 1]
72+
success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
73+
success_ret["tracker"] = success_ret_
74+
return success_ret
75+
76+
def eval_precision(self,tracker):
77+
precision_ret = {}
78+
precision_ret_ = {}
79+
for video in self.dataset:
80+
gt_traj = np.array(video.gt_traj)
81+
tracker_traj = video.load_tracker()
82+
tracker_traj = np.array(tracker_traj)
83+
n_frame = len(gt_traj)
84+
if hasattr(video, 'absent'):
85+
gt_traj = gt_traj[video.absent == 1]
86+
tracker_traj = tracker_traj[video.absent == 1]
87+
gt_center = self.convert_bb_to_center(gt_traj)
88+
tracker_center = self.convert_bb_to_center(tracker_traj)
89+
thresholds = np.arange(0, 51, 1)
90+
precision_ret_[video.name] = success_error(gt_center, tracker_center,
91+
thresholds, n_frame)
92+
precision_ret["tracker"] = precision_ret_
93+
return precision_ret
94+
95+
def eval_norm_precision(self,tracker):
96+
norm_precision_ret = {}
97+
norm_precision_ret_ = {}
98+
for video in self.dataset:
99+
gt_traj = np.array(video.gt_traj)
100+
tracker_traj = video.load_tracker()
101+
tracker_traj = np.array(tracker_traj)
102+
n_frame = len(gt_traj)
103+
if hasattr(video, 'absent'):
104+
gt_traj = gt_traj[video.absent == 1]
105+
tracker_traj = tracker_traj[video.absent == 1]
106+
gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
107+
tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
108+
thresholds = np.arange(0, 51, 1) / 100
109+
norm_precision_ret_[video.name] = success_error(gt_center_norm,
110+
tracker_center_norm, thresholds, n_frame)
111+
norm_precision_ret["tracker"] = norm_precision_ret_
112+
return norm_precision_ret
113+
114+
def show_result(self, success_ret, precision_ret=None,
115+
norm_precision_ret=None, show_video_level=False, helight_threshold=0.6):
116+
"""pretty print result
117+
Args:
118+
result: returned dict from function eval
119+
"""
120+
# sort tracker
121+
tracker_auc = {}
122+
for tracker_name in success_ret.keys():
123+
auc = np.mean(list(success_ret[tracker_name].values()))
124+
tracker_auc[tracker_name] = auc
125+
tracker_auc_ = sorted(tracker_auc.items(),
126+
key=lambda x:x[1],
127+
reverse=True)[:20]
128+
tracker_names = [x[0] for x in tracker_auc_]
129+
130+
131+
tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12)
132+
header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format(
133+
"Tracker name", "Success", "Norm Precision", "Precision")
134+
formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|"
135+
print('-'*len(header))
136+
print(header)
137+
print('-'*len(header))
138+
for tracker_name in tracker_names:
139+
# success = np.mean(list(success_ret[tracker_name].values()))
140+
success = tracker_auc[tracker_name]
141+
if precision_ret is not None:
142+
precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20]
143+
else:
144+
precision = 0
145+
if norm_precision_ret is not None:
146+
norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()),
147+
axis=0)[20]
148+
else:
149+
norm_precision = 0
150+
print(formatter.format(tracker_name, success, norm_precision, precision))
151+
print('-'*len(header))
152+
153+
if show_video_level and len(success_ret) < 10 \
154+
and precision_ret is not None \
155+
and len(precision_ret) < 10:
156+
print("\n\n")
157+
header1 = "|{:^21}|".format("Tracker name")
158+
header2 = "|{:^21}|".format("Video name")
159+
for tracker_name in success_ret.keys():
160+
# col_len = max(20, len(tracker_name))
161+
header1 += ("{:^21}|").format(tracker_name)
162+
header2 += "{:^9}|{:^11}|".format("success", "precision")
163+
print('-'*len(header1))
164+
print(header1)
165+
print('-'*len(header1))
166+
print(header2)
167+
print('-'*len(header1))
168+
videos = list(success_ret[tracker_name].keys())
169+
for video in videos:
170+
row = "|{:^21}|".format(video)
171+
for tracker_name in success_ret.keys():
172+
success = np.mean(success_ret[tracker_name][video])
173+
precision = np.mean(precision_ret[tracker_name][video])
174+
success_str = "{:^9.3f}".format(success)
175+
if success < helight_threshold:
176+
row += f'{Fore.RED}{success_str}{Style.RESET_ALL}|'
177+
else:
178+
row += success_str+'|'
179+
precision_str = "{:^11.3f}".format(precision)
180+
if precision < helight_threshold:
181+
row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
182+
else:
183+
row += precision_str+'|'
184+
print(row)
185+
print('-'*len(header1))
186+
187+
class Video(object):
188+
def __init__(self, name, root, video_dir, init_rect, img_names,
189+
gt_rect, attr):
190+
self.name = name
191+
self.video_dir = video_dir
192+
self.init_rect = init_rect
193+
self.gt_traj = gt_rect
194+
self.attr = attr
195+
self.pred_trajs = {}
196+
self.img_names = [os.path.join(root, x) for x in img_names]
197+
self.imgs = None
198+
img = cv.imread(self.img_names[0])
199+
assert img is not None, self.img_names[0]
200+
self.width = img.shape[1]
201+
self.height = img.shape[0]
202+
203+
def __len__(self):
204+
return len(self.img_names)
205+
206+
def __getitem__(self, idx):
207+
if self.imgs is None:
208+
return cv.imread(self.img_names[idx]), self.gt_traj[idx]
209+
else:
210+
return self.imgs[idx], self.gt_traj[idx]
211+
212+
def __iter__(self):
213+
for i in range(len(self.img_names)):
214+
if self.imgs is not None:
215+
yield self.imgs[i], self.gt_traj[i]
216+
else:
217+
yield cv.imread(self.img_names[i]), self.gt_traj[i]
218+
def load_tracker(self):
219+
traj_file = os.path.join("OTB_results", self.name+'.txt')
220+
if not os.path.exists(traj_file):
221+
if self.name == 'FleetFace':
222+
txt_name = 'fleetface.txt'
223+
elif self.name == 'Jogging-1':
224+
txt_name = 'jogging_1.txt'
225+
elif self.name == 'Jogging-2':
226+
txt_name = 'jogging_2.txt'
227+
elif self.name == 'Skating2-1':
228+
txt_name = 'skating2_1.txt'
229+
elif self.name == 'Skating2-2':
230+
txt_name = 'skating2_2.txt'
231+
elif self.name == 'FaceOcc1':
232+
txt_name = 'faceocc1.txt'
233+
elif self.name == 'FaceOcc2':
234+
txt_name = 'faceocc2.txt'
235+
elif self.name == 'Human4-2':
236+
txt_name = 'human4_2.txt'
237+
else:
238+
txt_name = self.name[0].lower()+self.name[1:]+'.txt'
239+
traj_file = os.path.join("OTB_results", txt_name)
240+
if os.path.exists(traj_file):
241+
with open(traj_file, 'r') as f :
242+
pred_traj = [list(map(float, x.strip().split(',')))
243+
for x in f.readlines()]
244+
if len(pred_traj) != len(self.gt_traj):
245+
print("tracker", len(pred_traj), len(self.gt_traj), self.name)
246+
else:
247+
return pred_traj
248+
else:
249+
print(traj_file)
250+
251+
252+
class OTBDATASET:
253+
def __init__(self, root):
254+
with open(os.path.join(root, 'OTB.json'), 'r') as f:
255+
meta_data = json.load(f)
256+
self.root = root
257+
# load videos
258+
pbar = tqdm(meta_data.keys(), desc='loading OTB', ncols=100)
259+
self.videos = {}
260+
for video in pbar:
261+
pbar.set_postfix_str(video)
262+
self.videos[video] = Video(video,
263+
self.root,
264+
meta_data[video]['video_dir'],
265+
meta_data[video]['init_rect'],
266+
meta_data[video]['img_names'],
267+
meta_data[video]['gt_rect'],
268+
meta_data[video]['attr'])
269+
# set attr
270+
attr = []
271+
for x in self.videos.values():
272+
attr += x.attr
273+
attr = set(attr)
274+
self.attr = {}
275+
self.attr['ALL'] = list(self.videos.keys())
276+
for x in attr:
277+
self.attr[x] = []
278+
for k, v in self.videos.items():
279+
for attr_ in v.attr:
280+
self.attr[attr_].append(k)
281+
282+
def __getitem__(self, idx):
283+
if isinstance(idx, str):
284+
return self.videos[idx]
285+
elif isinstance(idx, int):
286+
return self.videos[sorted(list(self.videos.keys()))[idx]]
287+
288+
def __len__(self):
289+
return len(self.videos)
290+
291+
def __iter__(self):
292+
keys = sorted(list(self.videos.keys()))
293+
for key in keys:
294+
yield self.videos[key]
295+
296+
297+
def get_axis_aligned_bbox(region):
298+
""" convert region to (cx, cy, w, h) that represent by axis aligned box
299+
"""
300+
nv = region.size
301+
if nv == 8:
302+
cx = np.mean(region[0::2])
303+
cy = np.mean(region[1::2])
304+
x1 = min(region[0::2])
305+
x2 = max(region[0::2])
306+
y1 = min(region[1::2])
307+
y2 = max(region[1::2])
308+
A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
309+
np.linalg.norm(region[2:4] - region[4:6])
310+
A2 = (x2 - x1) * (y2 - y1)
311+
s = np.sqrt(A1 / A2)
312+
w = s * (x2 - x1) + 1
313+
h = s * (y2 - y1) + 1
314+
else:
315+
x = region[0]
316+
y = region[1]
317+
w = region[2]
318+
h = region[3]
319+
cx = x+w/2
320+
cy = y+h/2
321+
return cx, cy, w, h
322+
323+
class OTB:
324+
325+
def __init__(self, root):
326+
self.root = root
327+
self.dataset = OTBDATASET(root)
328+
@property
329+
def name(self):
330+
return self.__class__.__name__
331+
332+
def eval(self, model):
333+
for v_idx, video in enumerate(self.dataset):
334+
toc = 0
335+
pred_bboxes = []
336+
scores = []
337+
track_times = []
338+
for idx, (img, gt_bbox) in enumerate(video):
339+
# convert bgr to rgb
340+
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
341+
tic = cv.getTickCount()
342+
if idx == 0:
343+
cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
344+
gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
345+
model.init(img, gt_bbox_)
346+
pred_bbox = gt_bbox_
347+
pred_bboxes.append(pred_bbox)
348+
scores.append(None)
349+
else:
350+
isLocated, bbox, score = model.infer(img)
351+
pred_bbox = bbox
352+
pred_bboxes.append(pred_bbox)
353+
scores.append(score)
354+
toc += cv.getTickCount() - tic
355+
track_times.append((cv.getTickCount() - tic) / cv.getTickFrequency())
356+
if idx == 0:
357+
cv.destroyAllWindows()
358+
toc /= cv.getTickFrequency()
359+
model_path = os.path.join('OTB_results')
360+
if not os.path.isdir(model_path):
361+
os.makedirs(model_path)
362+
result_path = os.path.join(model_path,'{}.txt'.format(video.name))
363+
with open(result_path, 'w') as f:
364+
for x in pred_bboxes:
365+
f.write(','.join([str(i) for i in x]) + '\n')
366+
print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
367+
v_idx + 1, video.name, toc, idx / toc))
368+
369+
370+
def get_result(self):
371+
return self.top1_acc, self.top5_acc
372+
373+
def print_result(self):
374+
benchmark = OPEBenchmark(self.dataset)
375+
success_ret = {}
376+
with Pool(processes=1) as pool:
377+
for ret in tqdm(pool.imap_unordered(benchmark.eval_success,"tracker"), desc='eval success', total=1, ncols=100):
378+
success_ret.update(ret)
379+
precision_ret = {}
380+
with Pool(processes=1) as pool:
381+
for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,"tracker"), desc='eval precision', total=1, ncols=100):
382+
precision_ret.update(ret)
383+
benchmark.show_result(success_ret, precision_ret,
384+
show_video_level=False)

0 commit comments

Comments
 (0)