Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
87f3873
Add extra check for window size
NimaSarajpoor Mar 27, 2025
f8d6df5
update module to include extra check for self join
NimaSarajpoor Mar 27, 2025
f174ed6
add tests for warning
NimaSarajpoor Mar 28, 2025
f611339
revise comment
NimaSarajpoor Mar 28, 2025
c2073ef
ignore coverage
NimaSarajpoor Mar 28, 2025
6c69155
minor improvement in docstring
NimaSarajpoor Mar 28, 2025
e63860c
fix flake8
NimaSarajpoor Mar 28, 2025
91f767e
Revised test function using expected signature
NimaSarajpoor Mar 29, 2025
ab411aa
fixed format
NimaSarajpoor Mar 29, 2025
b7494d9
Revise function to pass test
NimaSarajpoor Mar 30, 2025
3c87e0d
Update stumpy/core.py
NimaSarajpoor Mar 30, 2025
252d52b
improve comments
NimaSarajpoor Mar 30, 2025
8e5d9af
improve readability of function
NimaSarajpoor Mar 30, 2025
113b5c5
minor improvement in the description of param
NimaSarajpoor Mar 30, 2025
82caebb
remove redundant test function
NimaSarajpoor Mar 30, 2025
f29732f
Revise logic and the comment
NimaSarajpoor Mar 31, 2025
6f308a3
improving comments
NimaSarajpoor Mar 31, 2025
77b878b
minor change
NimaSarajpoor Mar 31, 2025
2c68716
minor change in comment
NimaSarajpoor Apr 1, 2025
643b4b0
minor change in comment
NimaSarajpoor Apr 1, 2025
a15b757
update aamp for checking window size
NimaSarajpoor Apr 1, 2025
316bf07
improve docstring and comments
NimaSarajpoor Apr 2, 2025
9f71816
improve docstring
NimaSarajpoor Apr 2, 2025
445a6cb
use smaller input to make test function more understandable
NimaSarajpoor Apr 2, 2025
7751792
updated stumped and aamped
NimaSarajpoor Apr 2, 2025
8bff40b
updated maamp and maamped modules
NimaSarajpoor Apr 2, 2025
f0cbfae
update different modules to consider the change in core.check_window_…
NimaSarajpoor Apr 2, 2025
aa61b24
minor fix
NimaSarajpoor Apr 2, 2025
9349e2a
improve comments
NimaSarajpoor Apr 2, 2025
54cd2fa
improve comments
NimaSarajpoor Apr 2, 2025
917fcc4
improved the explanations
NimaSarajpoor Apr 2, 2025
be4d6bb
minor change in the description of function
NimaSarajpoor Apr 2, 2025
97e6f2b
improve the clarity of the logic
NimaSarajpoor Apr 5, 2025
32a15f3
improve comment
NimaSarajpoor Apr 5, 2025
90d3901
improve description of function
NimaSarajpoor Apr 5, 2025
17e2db9
minor change
NimaSarajpoor Apr 5, 2025
0a25af1
improve readability and consistency
NimaSarajpoor Apr 5, 2025
e29cdca
minor change
NimaSarajpoor Apr 5, 2025
949db7e
minor changes
NimaSarajpoor Apr 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions stumpy/aamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,17 +407,17 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]
l = n_A - m + 1

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
if ignore_trivial:
if ignore_trivial: # self-join
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
else: # AB-join
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

P, PL, PR, I, IL, IR = _aamp(
Expand Down
6 changes: 3 additions & 3 deletions stumpy/aamped.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,17 +386,17 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

if ignore_trivial:
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

_aamped = core._client_to_func(client)
Expand Down
44 changes: 42 additions & 2 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,12 @@ def get_max_window_size(n):
return max_m


def check_window_size(m, max_size=None):
def check_window_size(m, max_size=None, n=None):
"""
Check the window size and ensure that it is greater than or equal to 3 and, if
`max_size` is provided, ensure that the window size is less than or equal to the
`max_size`
`max_size`. Furthermore, if `n` is provided, then it checks if there is
at least one subsequence with no non-trivial neighbour in a self-join.

Parameters
----------
Expand All @@ -568,6 +569,10 @@ def check_window_size(m, max_size=None):
max_size : int, default None
The maximum window size allowed

n : int, default None
The length of the time series in the case of a self-join.
`n` should be set to `None` in the case of an `AB-join`.

Returns
-------
None
Expand All @@ -589,6 +594,41 @@ def check_window_size(m, max_size=None):
if max_size is not None and m > max_size:
raise ValueError(f"The window size must be less than or equal to {max_size}")

if n is not None:
# Raise warning if there is at least one subsequence with no
# non-trivial neighbour in the case of a self-join.

# For any time series `T`, an "eligible nearest neighbor" subsequence for
# the central-most subsequence must be located outside the `excl_zone`,
# and the central-most subsequence will ALWAYS have the smallest index-wise
# distance to its furthest neighbour amongs all other subsequences. Therefore,
# we only need to check whether the `excl_zone` eliminates all "neighbors" for
# the central-most subsequence in `T`. In fact, we just need to verify whether
# the `excl_zone` eliminates the "neighbor" that is furthest away (index-wise)
# from the central-most subsequence. If it does not, this implies that all
# subsequences in `T` will have at least one "eligible nearest neighbors"
# outside their respective `excl_zone`

excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

l = n - m + 1

# If `l` is odd (`l == 2k+1`):
# The central subsequence is at index `k`, with furthest neighbors at `0`
# and `2k`, both `k == l // 2` indices away from the central-most subsequence.

# If `l` is even (`l == 2k`):
# The central subsequences are at `k-1` and `k`. The furthest neighbor is
# at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence
# and its furthest neighbor are `k == l // 2` indices away.
max_gap = l // 2
if max_gap <= excl_zone:
msg = (
f"The window size, 'm = {m}', may be too large and could lead to "
+ "meaningless results. Consider reducing 'm' where necessary"
)
warnings.warn(msg)


@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
def _sliding_dot_product(Q, T):
Expand Down
7 changes: 6 additions & 1 deletion stumpy/gpu_aamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,13 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1):
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_B.shape[0]
w = T_A.shape[0] - m + 1
Expand Down
7 changes: 6 additions & 1 deletion stumpy/gpu_stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,8 +666,13 @@ def gpu_stump(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_B.shape[0]
w = T_A.shape[0] - m + 1
Expand Down
8 changes: 4 additions & 4 deletions stumpy/maamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def maamp_subspace(
returned.
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1])

subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m)
neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m)
Expand Down Expand Up @@ -269,7 +269,7 @@ def maamp_mdl(
A list of numpy.ndarrays that contains the `k`th-dimensional subspaces
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1])

if discretize_func is None:
T_isfinite = np.isfinite(T)
Expand Down Expand Up @@ -441,7 +441,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False,
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=T.shape[1])
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])

if include is not None: # pragma: no cover
include = core._preprocess_include(include)
Expand Down Expand Up @@ -933,7 +933,7 @@ def maamp(T, m, include=None, discords=False, p=2.0):
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
2 changes: 1 addition & 1 deletion stumpy/maamped.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0):
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
8 changes: 5 additions & 3 deletions stumpy/mstump.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def subspace(
array([0, 1])
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
Expand Down Expand Up @@ -409,7 +409,7 @@ def mdl(
(array([ 80. , 111.509775]), [array([1]), array([0, 1])])
"""
T = core._preprocess(T)
core.check_window_size(m, max_size=T.shape[-1])
core.check_window_size(m, max_size=T.shape[-1], n=T.shape[-1])
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)

if discretize_func is None:
Expand Down Expand Up @@ -1228,7 +1228,9 @@ def mstump(
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
# mstump currently supports self-join. Therefore, the argument `n` should be
# passed to the function `core.check_window_size`.
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
4 changes: 3 additions & 1 deletion stumpy/mstumped.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,9 @@ def mstumped(
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
raise ValueError(f"{err}")

core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
# mstump currently supports self-join. Therefore, the argument `n` should be
# passed to the function `core.check_window_size`.
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])

if include is not None:
include = core._preprocess_include(include)
Expand Down
7 changes: 6 additions & 1 deletion stumpy/scraamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,10 +646,15 @@ def __init__(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
self._ignore_trivial = core.check_ignore_trivial(
self._T_A, self._T_B, self._ignore_trivial
)
if self._ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

self._n_A = self._T_A.shape[0]
self._n_B = self._T_B.shape[0]
Expand Down
7 changes: 6 additions & 1 deletion stumpy/scrump.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,10 +905,15 @@ def __init__(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
self._ignore_trivial = core.check_ignore_trivial(
self._T_A, self._T_B, self._ignore_trivial
)
if self._ignore_trivial:
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else:
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

self._n_A = self._T_A.shape[0]
self._n_B = self._T_B.shape[0]
Expand Down
6 changes: 4 additions & 2 deletions stumpy/stamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,14 @@ def stamp(
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

subseq_T_A = core.rolling_window(T_A, m)
excl_zone = int(np.ceil(m / 2))

# Add exclusionary zone
if ignore_trivial:
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
out = [
_mass_PI(
subseq,
Expand All @@ -229,6 +230,7 @@ def stamp(
for i, subseq in enumerate(subseq_T_A)
]
else:
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
out = [
_mass_PI(
subseq,
Expand Down
7 changes: 6 additions & 1 deletion stumpy/stomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,13 @@ def _stomp(T_A, m, T_B=None, ignore_trivial=True):
if T_B.ndim != 1: # pragma: no cover
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
if ignore_trivial: # self-join
core.check_window_size(
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
)
else: # AB-join
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))

n = T_A.shape[0]
l = n - m + 1
Expand Down
11 changes: 5 additions & 6 deletions stumpy/stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,18 +711,17 @@ def stump(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]
l = n_A - m + 1

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

if ignore_trivial:
if ignore_trivial: # self-join
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
else: # AB-join
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

P, PL, PR, I, IL, IR = _stump(
Expand Down
6 changes: 3 additions & 3 deletions stumpy/stumped.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,17 +618,17 @@ def stumped(
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
)

core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)

n_A = T_A.shape[0]
n_B = T_B.shape[0]

ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

if ignore_trivial:
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
else:
core.check_window_size(m, max_size=min(n_A, n_B))
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)

_stumped = core._client_to_func(client)
Expand Down
15 changes: 15 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,21 @@ def test_check_max_window_size():
core.check_window_size(m, max_size=3)


def test_check_window_size_excl_zone():
# To ensure warning is raised if there is at least one subsequence
# that has no non-trivial neighbor
T = np.random.rand(10)
m = 7

# For `len(T) == 10` and `m == 7`, the `excl_zone` is ceil(m / 4) = 2.
# In this case, there are `10 - 7 + 1 = 4` subsequences of length 7,
# starting at indices 0, 1, 2, and 3. For a subsequence that starts at
# index 1, there are no non-trivial neighbors. So, a warning should be
# raised.
with pytest.warns(UserWarning):
core.check_window_size(m, max_size=len(T), n=len(T))


@pytest.mark.parametrize("Q, T", test_data)
def test_njit_sliding_dot_product(Q, T):
ref_mp = naive_rolling_window_dot_product(Q, T)
Expand Down