Skip to content

Commit b7494d9

Browse files
committed
Revise function to pass test
1 parent ab411aa commit b7494d9

File tree

2 files changed

+33
-43
lines changed

2 files changed

+33
-43
lines changed

stumpy/core.py

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def get_max_window_size(n):
554554
return max_m
555555

556556

557-
def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
557+
def check_window_size(m, max_size=None, n=None):
558558
"""
559559
Check the window size and ensure that it is greater than or equal to 3 and, if
560560
`max_size` is provided, ensure that the window size is less than or equal to the
@@ -569,12 +569,8 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
569569
max_size : int, default None
570570
The maximum window size allowed
571571
572-
excl_zone : int, default None
573-
Size of the exclusion zone. If provided, then the `last_start_index`
574-
must also be provided.
575-
576-
last_start_index : int, default None
577-
Start index of the last subsequence
572+
n : int, default None
573+
The length of the time series.
578574
579575
Returns
580576
-------
@@ -597,31 +593,31 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
597593
if max_size is not None and m > max_size:
598594
raise ValueError(f"The window size must be less than or equal to {max_size}")
599595

600-
if excl_zone is not None:
601-
if last_start_index is None: # pragma: no cover
602-
raise ValueError(
603-
"last_start_index must be provided when excl_zone is not None"
604-
)
605-
606-
# Check if subsequneces have non-trivial neighbours
607-
608-
# Case 1:
609-
# There is at least one subsequence with non-trivial neighbour
610-
# i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone
611-
# In this case, we just need to consider the two subsequences that are furthest
612-
# apart from each other.
613-
# In other words: |last_start_index - 0| > excl_zone
614-
cond_1 = (last_start_index - 0) > excl_zone
615-
616-
# Case 2:
617-
# Check if each single subsequence has at least one non-trivial neighbor
618-
# i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone
619-
# In this case, we need to consider the subseuqence whose furthest neighbour is
620-
# the shortest compared to other subsequences.
621-
# In other words: |ceil(last_start_index / 2) - 0| > excl_zone
622-
cond_2 = (math.ceil(last_start_index / 2) - 0) > excl_zone
623-
624-
if not cond_1 or not cond_2:
596+
if n is not None:
597+
# The following code raises warning if there is at least one subsequence
598+
# with no non-trivial neighbor. The following logic does not check if
599+
# a subsequence has a non-finite value.
600+
601+
# Logic: For each subsequnece `S_i = T[i : i + m]`, its neighbor `S_j`
602+
# is non-trivial if |i - j| > excl_zone. Let's denote `S_jmax` as
603+
# the neighbor that is furthest away from `S_i` (index-wise). So:
604+
# |i - jmax| >= |i - j|
605+
# Therefore, if `S_i` has at least one non-trivial neighbor, then `S_jmax` is
606+
# definitely a non-trivial neighbor. Because:
607+
# |i - jmax| >= |i - j| > excl_zone
608+
# To ensure ALL subsequences have at least one non-trivial neighbor, we can just
609+
# check the subsequence `S_i` that has the minimum |i - jmax|. Let's denote `d`
610+
# as that minimum value. So, if d > excl_zone, then:
611+
# For any `i` and its corresponding `jmax`, we have:
612+
# |i - jmax| >= d > excl_zone
613+
614+
# The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence,
615+
# i.e. i == int(ceil((n - m) / 2)), and its corresponding jmax is 0. Hence,
616+
# we just need to make sure the following inequity is satisfied:
617+
# |int(ceil((n - m) / 2)) - 0| > excl_zone`
618+
619+
excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
620+
if (int(math.ceil((n - m) / 2)) - 0) <= excl_zone:
625621
msg = (
626622
f"The window size, 'm = {m}', may be too large and could lead to "
627623
+ "meaningless results. Consider reducing 'm' where necessary"

stumpy/stump.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -716,20 +716,14 @@ def stump(
716716
l = n_A - m + 1
717717

718718
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
719-
if ignore_trivial:
720-
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
719+
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
720+
if ignore_trivial: # self-join
721+
core.check_window_size(m, max_size=n_A, n=n_A)
721722
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
722-
else:
723-
excl_zone = None
723+
else: # AB-join
724+
core.check_window_size(m, max_size=min(n_A, n_B))
724725
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
725726

726-
core.check_window_size(
727-
m,
728-
max_size=min(T_A.shape[0], T_B.shape[0]),
729-
excl_zone=excl_zone,
730-
last_start_index=l - 1,
731-
)
732-
733727
P, PL, PR, I, IL, IR = _stump(
734728
T_A,
735729
T_B,

0 commit comments

Comments
 (0)