Skip to content

Commit e4fb1b3

Browse files
committed
Refactored multi-dimensional distance profile
1 parent 955ab96 commit e4fb1b3

File tree

6 files changed

+252
-123
lines changed

6 files changed

+252
-123
lines changed

stumpy/maamp.py

Lines changed: 82 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -112,25 +112,23 @@ def maamp_subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False):
112112
return S
113113

114114

115-
def _query_maamp_profile(
115+
def _maamp_multi_distance_profile(
116116
query_idx, T_A, T_B, m, excl_zone, T_B_subseq_isfinite, include=None, discords=False
117117
):
118118
"""
119119
Multi-dimensional wrapper to compute the multi-dimensional non-normalized (i.e.,
120-
without z-normalization) matrix profile and the multi-dimensional matrix profile
121-
index for a given query window within the times series or sequence that is denoted
122-
by the `query_idx` index. Essentially, this is a convenience wrapper around
123-
`_multi_mass_absolute`.
120+
without z-normalization) distance profile for a given query window within the
121+
times series or sequence that is denoted by the `query_idx` index. Essentially,
122+
this is a convenience wrapper around `_multi_mass_absolute`.
124123
125124
Parameters
126125
----------
127126
query_idx : int
128-
The window index to calculate the first multi-dimensional matrix profile and
129-
multi-dimensional matrix profile indices
127+
The window index to calculate the multi-dimensional distance profile
130128
131129
T_A : numpy.ndarray
132-
The time series or sequence for which the multi-dimensional matrix profile and
133-
multi-dimensional matrix profile indices
130+
The time series or sequence for which the multi-dimensional distance profile
131+
will be returned
134132
135133
T_B : numpy.ndarray
136134
The time series or sequence that contains your query subsequences
@@ -159,13 +157,9 @@ def _query_maamp_profile(
159157
160158
Returns
161159
-------
162-
P : numpy.ndarray
163-
Multi-dimensional matrix profile for the window with index equal to
160+
D : numpy.ndarray
161+
Multi-dimensional distance profile for the window with index equal to
164162
`query_idx`
165-
166-
I : numpy.ndarray
167-
Multi-dimensional matrix profile indices for the window with index
168-
equal to `query_idx`
169163
"""
170164
d, n = T_A.shape
171165
k = n - m + 1
@@ -194,17 +188,65 @@ def _query_maamp_profile(
194188

195189
core.apply_exclusion_zone(D, query_idx, excl_zone)
196190

197-
P = np.full(d, np.inf, dtype=np.float64)
198-
I = np.full(d, -1, dtype=np.int64)
191+
return D
199192

200-
for i in range(d):
201-
min_index = np.argmin(D[i])
202-
I[i] = min_index
203-
P[i] = D[i, min_index]
204-
if np.isinf(P[i]): # pragma nocover
205-
I[i] = -1
206193

207-
return P, I
194+
def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False):
195+
"""
196+
Multi-dimensional wrapper to compute the multi-dimensional non-normalized (i.e.,
197+
without z-normalization) distance profile for a given query window within the
198+
times series or sequence that is denoted by the `query_idx` index.
199+
200+
Parameters
201+
----------
202+
query_idx : int
203+
The window index to calculate the multi-dimensional distance profile
204+
205+
T : numpy.ndarray
206+
The time series or sequence for which the multi-dimensional distance profile
207+
will be returned
208+
209+
m : int
210+
Window size
211+
212+
include : numpy.ndarray, default None
213+
A list of (zero-based) indices corresponding to the dimensions in `T` that
214+
must be included in the constrained multidimensional motif search.
215+
For more information, see Section IV D in:
216+
217+
`DOI: 10.1109/ICDM.2017.66 \
218+
<https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
219+
220+
discords : bool, default False
221+
When set to `True`, this reverses the distance profile to favor discords rather
222+
than motifs. Note that indices in `include` are still maintained and respected.
223+
224+
Returns
225+
-------
226+
D : numpy.ndarray
227+
Multi-dimensional distance profile for the window with index equal to
228+
`query_idx`
229+
"""
230+
T, T_subseq_isfinite = core.preprocess_non_normalized(T, m)
231+
232+
if T.ndim <= 1: # pragma: no cover
233+
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
234+
raise ValueError(f"{err}")
235+
236+
core.check_window_size(m, max_size=T.shape[1])
237+
238+
if include is not None: # pragma: no cover
239+
include = mstump._preprocess_include(include)
240+
241+
excl_zone = int(
242+
np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
243+
) # See Definition 3 and Figure 3
244+
245+
D = _maamp_multi_distance_profile(
246+
query_idx, T, T, m, excl_zone, T_subseq_isfinite, include, discords
247+
)
248+
249+
return D
208250

209251

210252
def _get_first_maamp_profile(
@@ -215,8 +257,9 @@ def _get_first_maamp_profile(
215257
z-normalization multi-dimensional matrix profile and multi-dimensional matrix
216258
profile index for a given window within the times series or sequence that is denoted
217259
by the `start` index. Essentially, this is a convenience wrapper around
218-
`_multi_mass_absolute`. This is a convenience wrapper for the `_query_maamp_profile`
219-
function but does not return the multi-dimensional matrix profile subspace.
260+
`_multi_mass_absolute`. This is a convenience wrapper for the
261+
`_maamp_multi_distance_profile` function but does not return the multi-dimensional
262+
matrix profile subspace.
220263
221264
Parameters
222265
----------
@@ -264,9 +307,21 @@ def _get_first_maamp_profile(
264307
Multi-dimensional matrix profile indices for the window with index
265308
equal to `start`
266309
"""
267-
P, I = _query_maamp_profile(
310+
D = _maamp_multi_distance_profile(
268311
start, T_A, T_B, m, excl_zone, T_B_subseq_isfinite, include, discords
269312
)
313+
314+
d = T_A.shape[0]
315+
P = np.full(d, np.inf, dtype=np.float64)
316+
I = np.full(d, -1, dtype=np.int64)
317+
318+
for i in range(d):
319+
min_index = np.argmin(D[i])
320+
I[i] = min_index
321+
P[i] = D[i, min_index]
322+
if np.isinf(P[i]): # pragma nocover
323+
I[i] = -1
324+
270325
return P, I
271326

272327

stumpy/mstump.py

Lines changed: 92 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from functools import lru_cache
1111

1212
from . import core, config
13-
from .maamp import maamp, maamp_subspace
13+
from .maamp import maamp_multi_distance_profile, maamp, maamp_subspace
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -336,24 +336,22 @@ def subspace(T, m, subseq_idx, nn_idx, k, include=None, discords=False, normaliz
336336
return S
337337

338338

339-
def _query_mstump_profile(
339+
def _multi_distance_profile(
340340
query_idx, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include=None, discords=False
341341
):
342342
"""
343-
Multi-dimensional wrapper to compute the multi-dimensional matrix profile and
344-
the multi-dimensional matrix profile index for a given query window within the times
345-
series or sequence that is denoted by the `query_idx` index. Essentially, this is a
346-
convenience wrapper around `_multi_mass`.
343+
Multi-dimensional wrapper to compute the multi-dimensional distance profile for a
344+
given query window within the times series or sequence that is denoted by the
345+
`query_idx` index. Essentially, this is a convenience wrapper around `_multi_mass`.
347346
348347
Parameters
349348
----------
350349
query_idx : int
351-
The window index to calculate the first multi-dimensional matrix profile and
352-
multi-dimensional matrix profile indices
350+
The window index to calculate the multi-dimensional distance profile for
353351
354352
T_A : numpy.ndarray
355-
The time series or sequence for which the multi-dimensional matrix profile and
356-
multi-dimensional matrix profile indices
353+
The time series or sequence for which the multi-dimensional distance profile
354+
is computed
357355
358356
T_B : numpy.ndarray
359357
The time series or sequence that contains your query subsequences
@@ -371,10 +369,10 @@ def _query_mstump_profile(
371369
Sliding standard deviation for `T_A`
372370
373371
μ_Q : numpy.ndarray
374-
Sliding mean for `T_B`
372+
Sliding mean for the query subsequence `T_B`
375373
376374
σ_Q : numpy.ndarray
377-
Sliding standard deviation for `T_B`
375+
Sliding standard deviation for the query subsequence `T_B`
378376
379377
include : numpy.ndarray, default None
380378
A list of (zero-based) indices corresponding to the dimensions in `T` that
@@ -390,13 +388,9 @@ def _query_mstump_profile(
390388
391389
Returns
392390
-------
393-
P : numpy.ndarray
394-
Multi-dimensional matrix profile for the window with index equal to
391+
D : numpy.ndarray
392+
Multi-dimensional distance profile for the window with index equal to
395393
`query_idx`
396-
397-
I : numpy.ndarray
398-
Multi-dimensional matrix profile indices for the window with index
399-
equal to `query_idx`
400394
"""
401395
d, n = T_A.shape
402396
k = n - m + 1
@@ -427,17 +421,73 @@ def _query_mstump_profile(
427421

428422
core.apply_exclusion_zone(D, query_idx, excl_zone)
429423

430-
P = np.full(d, np.inf, dtype=np.float64)
431-
I = np.full(d, -1, dtype=np.int64)
424+
return D
432425

433-
for i in range(d):
434-
min_index = np.argmin(D[i])
435-
I[i] = min_index
436-
P[i] = D[i, min_index]
437-
if np.isinf(P[i]): # pragma nocover
438-
I[i] = -1
439426

440-
return P, I
427+
@core.non_normalized(maamp_multi_distance_profile)
428+
def multi_distance_profile(
429+
query_idx, T, m, include=None, discords=False, normalize=True
430+
):
431+
"""
432+
Multi-dimensional wrapper to compute the multi-dimensional distance profile for a
433+
given query window within the times series or sequence that is denoted by the
434+
`query_idx` index.
435+
436+
Parameters
437+
----------
438+
query_idx : int
439+
The window index to calculate the multi-dimensional distance profile for
440+
441+
T : numpy.ndarray
442+
The multi-dimensional time series or sequence for which the multi-dimensional
443+
distance profile will be returned
444+
445+
m : int
446+
Window size
447+
448+
include : numpy.ndarray, default None
449+
A list of (zero-based) indices corresponding to the dimensions in `T` that
450+
must be included in the constrained multidimensional motif search.
451+
For more information, see Section IV D in:
452+
453+
`DOI: 10.1109/ICDM.2017.66 \
454+
<https://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf>`__
455+
456+
discords : bool, default False
457+
When set to `True`, this reverses the distance profile to favor discords rather
458+
than motifs. Note that indices in `include` are still maintained and respected.
459+
460+
normalize : bool, default True
461+
When set to `True`, this z-normalizes subsequences prior to computing distances.
462+
Otherwise, this function gets re-routed to its complementary non-normalized
463+
equivalent set in the `@core.non_normalized` function decorator.
464+
465+
Returns
466+
-------
467+
D : numpy.ndarray
468+
Multi-dimensional distance profile for the window with index equal to
469+
`query_idx`
470+
"""
471+
T, M_T, Σ_T = core.preprocess(T, m)
472+
473+
if T.ndim <= 1: # pragma: no cover
474+
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
475+
raise ValueError(f"{err}")
476+
477+
core.check_window_size(m, max_size=T.shape[1])
478+
479+
if include is not None: # pragma: no cover
480+
include = _preprocess_include(include)
481+
482+
excl_zone = int(
483+
np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM)
484+
) # See Definition 3 and Figure 3
485+
486+
D = _multi_distance_profile(
487+
query_idx, T, T, m, excl_zone, M_T, Σ_T, M_T, Σ_T, include, discords
488+
)
489+
490+
return D
441491

442492

443493
def _get_first_mstump_profile(
@@ -448,8 +498,8 @@ def _get_first_mstump_profile(
448498
and multi-dimensional matrix profile index for a given window within the
449499
times series or sequence that is denoted by the `start` index.
450500
Essentially, this is a convenience wrapper around `_multi_mass`. This is a
451-
convenience wrapper for the `_query_mstump_profile` function but does not return
452-
the multi-dimensional matrix profile subspace.
501+
convenience wrapper for the `_multi_distance_profile` function but does not
502+
return the multi-dimensional matrix profile subspace.
453503
454504
Parameters
455505
----------
@@ -505,9 +555,21 @@ def _get_first_mstump_profile(
505555
Multi-dimensional matrix profile indices for the window with index
506556
equal to `start`
507557
"""
508-
P, I = _query_mstump_profile(
558+
D = _multi_distance_profile(
509559
start, T_A, T_B, m, excl_zone, M_T, Σ_T, μ_Q, σ_Q, include, discords
510560
)
561+
562+
d = T_A.shape[0]
563+
P = np.full(d, np.inf, dtype=np.float64)
564+
I = np.full(d, -1, dtype=np.int64)
565+
566+
for i in range(d):
567+
min_index = np.argmin(D[i])
568+
I[i] = min_index
569+
P[i] = D[i, min_index]
570+
if np.isinf(P[i]): # pragma nocover
571+
I[i] = -1
572+
511573
return P, I
512574

513575

0 commit comments

Comments
 (0)