Skip to content

Commit 1c59593

Browse files
committed
Implement new outlier detection routine
1 parent 681c225 commit 1c59593

File tree

1 file changed

+122
-27
lines changed

1 file changed

+122
-27
lines changed

pysteps/motion/lucaskanade.py

Lines changed: 122 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
cv2_imported = False
2525
import scipy.spatial
2626
import time
27+
import warnings
2728

2829

2930
def dense_lucaskanade(R, **kwargs):
@@ -101,11 +102,21 @@ def dense_lucaskanade(R, **kwargs):
101102
It represents the 0-based maximal pyramid level number, by default this
102103
is set to 3.
103104
104-
nr_IQR_outlier : int, optional
105-
Maximum acceptable deviation from the median velocity value in terms of
106-
number of inter quantile ranges (IQR). Any velocity that is larger than
107-
this value is flagged as outlier and excldued from the interpolation.
105+
nr_std_outlier : int, optional
106+
Maximum acceptable deviation from the mean/median in terms of
107+
number of standard deviations. Any anomaly larger than
108+
this value is flagged as outlier and excluded from the interpolation.
108109
By default this is set to 3.
110+
111+
multivariate_outlier : bool, optional
112+
If true (the default), the outlier detection is computed in terms of
113+
the Mahalanobis distance. If false, the outlier detection is simply
114+
computed in terms of velocity.
115+
116+
k_outlier : int, optinal
117+
The number of nearest neighbours used to localize the outlier detection.
118+
If set equal to 0, it employs all the data points.
119+
The default is 30.
109120
110121
size_opening : int, optional
111122
The size of the structuring element kernel in pixels. This is used to
@@ -133,8 +144,8 @@ def dense_lucaskanade(R, **kwargs):
133144
"gaussian".
134145
135146
k : int, optional
136-
The number of nearest neighbors used for fast interpolation, by default
137-
this is set to 20. If set equal to zero, it employs all the neighbors.
147+
The number of nearest neighbours used for fast interpolation, by default
148+
this is set to 20. If set equal to zero, it employs all the neighbours.
138149
139150
epsilon : float, optional
140151
The adjustable constant used in the gaussian and inverse radial basis
@@ -201,7 +212,17 @@ def dense_lucaskanade(R, **kwargs):
201212
block_size_ST = kwargs.get("block_size_ST", 15)
202213
winsize_LK = kwargs.get("winsize_LK", (50, 50))
203214
nr_levels_LK = kwargs.get("nr_levels_LK", 3)
204-
nr_IQR_outlier = kwargs.get("nr_IQR_outlier", 3)
215+
nr_std_outlier = kwargs.get("nr_std_outlier", 3)
216+
nr_IQR_outlier = kwargs.get("nr_IQR_outlier", None)
217+
if nr_IQR_outlier is not None:
218+
nr_std_outlier = nr_IQR_outlier
219+
warnings.warn(
220+
"the 'nr_IQR_outlier' argument will be deprecated in the next release; "
221+
+ "use 'nr_std_outlier' instead.",
222+
category=FutureWarning,
223+
)
224+
multivariate_outlier = kwargs.get("multivariate_outlier", True)
225+
k_outlier = kwargs.get("k_outlier", 30)
205226
size_opening = kwargs.get("size_opening", 3)
206227
decl_grid = kwargs.get("decl_grid", 20)
207228
min_nr_samples = kwargs.get("min_nr_samples", 2)
@@ -297,14 +318,11 @@ def dense_lucaskanade(R, **kwargs):
297318
if x0 is None:
298319
continue
299320

300-
# exclude outlier vectors
301-
x0, y0, u, v = _outlier_removal(x0, y0, u, v, nr_IQR_outlier)
302-
303321
# stack vectors within time window as column vectors
304-
x0Stack.append(x0[:, None])
305-
y0Stack.append(y0[:, None])
306-
uStack.append(u[:, None])
307-
vStack.append(v[:, None])
322+
x0Stack.append(x0.flatten()[:, None])
323+
y0Stack.append(y0.flatten()[:, None])
324+
uStack.append(u.flatten()[:, None])
325+
vStack.append(v.flatten()[:, None])
308326

309327
# return zero motion field is no sparse vectors are found
310328
if len(x0Stack) == 0:
@@ -320,6 +338,11 @@ def dense_lucaskanade(R, **kwargs):
320338
u = np.vstack(uStack)
321339
v = np.vstack(vStack)
322340

341+
# exclude outlier vectors
342+
x, y, u, v = _outlier_removal(
343+
x, y, u, v, nr_std_outlier, multivariate_outlier, k_outlier, verbose
344+
)
345+
323346
if verbose:
324347
print("--- LK found %i sparse vectors ---" % x.size)
325348

@@ -532,7 +555,7 @@ def _clean_image(R, n=3, thr=0):
532555
return R
533556

534557

535-
def _outlier_removal(x, y, u, v, thr):
558+
def _outlier_removal(x, y, u, v, thr, multivariate=True, k=30, verbose=False):
536559

537560
"""Outlier removal.
538561
@@ -548,20 +571,92 @@ def _outlier_removal(x, y, u, v, thr):
548571
Y-components of the velocities.
549572
thr : float
550573
Threshold for outlier detection defined as measure of deviation from
551-
the mean/median of the velocity distribution.
574+
the mean/median in terms of standard deviations.
575+
multivariate : bool, optional
576+
If true (the default), the outlier detection is computed in terms of
577+
the Mahalanobis distance. If false, the outlier detection is simply
578+
computed in terms of velocity.
579+
k : int, optinal
580+
The number of nearest neighbours used to localize the outlier detection.
581+
If set equal to 0, it employs all the data points.
582+
The default is 30.
552583
553584
Returns
554585
-------
555-
A four-element tuple (x,y,u,v) containing the x- and y-coordinates and
556-
velocity components of the motion vectors.
586+
A four-element tuple (x,y,u,v) containing the x- and y-coordinates and
587+
velocity components of the motion vectors.
557588
"""
558-
vel = np.sqrt(u ** 2 + v ** 2) # [px/timesteps]
559-
q1, q2 = np.percentile(vel, [25, 75])
560-
min_speed_thr = np.max((0, q1 - thr * (q2 - q1)))
561-
max_speed_thr = q2 + thr * (q2 - q1)
562-
keep = np.logical_and(vel < max_speed_thr, vel > min_speed_thr)
563589

564-
return x[keep], y[keep], u[keep], v[keep]
590+
if multivariate:
591+
data = np.concatenate((u, v), axis=1)
592+
593+
# globally
594+
if k <= 0:
595+
596+
if not multivariate:
597+
598+
# in terms of velocity
599+
600+
vel = np.sqrt(u ** 2 + v ** 2) # [px/timesteps]
601+
q1, q2, q3 = np.percentile(vel, [16, 50, 84])
602+
min_speed_thr = np.max((0, q2 - thr * (q3 - q1) / 2))
603+
max_speed_thr = q2 + thr * (q3 - q1) / 2
604+
keep = np.logical_and(vel < max_speed_thr, vel >= min_speed_thr)
605+
606+
else:
607+
608+
# mahalanobis distance
609+
610+
data = data - np.mean(data, axis=0)
611+
V = np.cov(data.T)
612+
VI = np.linalg.inv(V)
613+
MD = np.sqrt(np.dot(np.dot(data, VI), data.T).diagonal())
614+
keep = MD < thr
615+
616+
# locally
617+
else:
618+
619+
points = np.concatenate((x, y), axis=1)
620+
tree = scipy.spatial.cKDTree(points)
621+
_, inds = tree.query(points, k=k + 1)
622+
keep = []
623+
for i in range(inds.shape[0]):
624+
625+
if not multivariate:
626+
627+
# in terms of velocity
628+
629+
thisvel = np.sqrt(u[i] ** 2 + v[i] ** 2) # [px/timesteps]
630+
neighboursvel = np.sqrt(u[inds[i, 1:]] ** 2 + v[inds[i, 1:]] ** 2)
631+
q1, q2, q3 = np.percentile(neighboursvel, [16, 50, 84])
632+
min_speed_thr = np.max((0, q2 - thr * (q3 - q1) / 2))
633+
max_speed_thr = q2 + thr * (q3 - q1) / 2
634+
keep.append(thisvel < max_speed_thr and thisvel > min_speed_thr)
635+
636+
else:
637+
638+
# mahalanobis distance
639+
640+
thisdata = data[i, :]
641+
neighbours = data[inds[i, 1:], :].copy()
642+
thisdata = thisdata - np.mean(neighbours, axis=0)
643+
neighbours = neighbours - np.mean(neighbours, axis=0)
644+
V = np.cov(neighbours.T)
645+
VI = np.linalg.inv(V)
646+
MD = np.sqrt(np.dot(np.dot(thisdata, VI), thisdata.T))
647+
keep.append(MD < thr)
648+
649+
keep = np.array(keep)
650+
651+
if verbose:
652+
print("--- %i outliers removed ---" % np.sum(~keep))
653+
654+
x = x[keep]
655+
y = y[keep]
656+
u = u[keep]
657+
v = v[keep]
658+
659+
return x, y, u, v
565660

566661

567662
def _declustering(x, y, u, v, decl_grid, min_nr_samples):
@@ -666,7 +761,7 @@ def _interpolate_sparse_vectors(
666761
default : inverse
667762
available : nearest, inverse, gaussian
668763
k : int or "all"
669-
the number of nearest neighbors used to speed-up the interpolation
764+
the number of nearest neighbours used to speed-up the interpolation
670765
If set equal to "all", it employs all the sparse vectors
671766
default : 20
672767
epsilon : float
@@ -728,7 +823,7 @@ def _interpolate_sparse_vectors(
728823
idelta = subgrid.shape[0]
729824

730825
if rbfunction.lower() == "nearest":
731-
# find indices of the nearest neighbors
826+
# find indices of the nearest neighbours
732827
_, inds = tree.query(subgrid, k=1)
733828

734829
U[i0 : (i0 + idelta)] = u.ravel()[inds]
@@ -744,7 +839,7 @@ def _interpolate_sparse_vectors(
744839
).astype(int)
745840

746841
else:
747-
# find indices of the k-nearest neighbors
842+
# find indices of the k-nearest neighbours
748843
d, inds = tree.query(subgrid, k=k)
749844

750845
if inds.ndim == 1:

0 commit comments

Comments
 (0)