@@ -554,7 +554,7 @@ def get_max_window_size(n):
554554 return max_m
555555
556556
557- def check_window_size (m , max_size = None , excl_zone = None , last_start_index = None ):
557+ def check_window_size (m , max_size = None , n = None ):
558558 """
559559 Check the window size and ensure that it is greater than or equal to 3 and, if
560560 `max_size` is provided, ensure that the window size is less than or equal to the
@@ -569,12 +569,8 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
569569 max_size : int, default None
570570 The maximum window size allowed
571571
572- excl_zone : int, default None
573- Size of the exclusion zone. If provided, then the `last_start_index`
574- must also be provided.
575-
576- last_start_index : int, default None
577- Start index of the last subsequence
572+ n : int, default None
573+ The length of the time series.
578574
579575 Returns
580576 -------
@@ -597,31 +593,31 @@ def check_window_size(m, max_size=None, excl_zone=None, last_start_index=None):
597593 if max_size is not None and m > max_size :
598594 raise ValueError (f"The window size must be less than or equal to { max_size } " )
599595
600- if excl_zone is not None :
601- if last_start_index is None : # pragma: no cover
602- raise ValueError (
603- "last_start_index must be provided when excl_zone is not None"
604- )
605-
606- # Check if subsequneces have non-trivial neighbours
607-
608- # Case 1:
609- # There is at least one subsequence with non-trivial neighbour
610- # i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone
611- # In this case, we just need to consider the two subsequences that are furthest
612- # apart from each other.
613- # In other words: |last_start_index - 0| > excl_zone
614- cond_1 = ( last_start_index - 0 ) > excl_zone
615-
616- # Case 2:
617- # Check if each single subsequence has at least one non-trivial neighbor
618- # i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone
619- # In this case, we need to consider the subseuqence whose furthest neighbour is
620- # the shortest compared to other subsequences.
621- # In other words: | ceil(last_start_index / 2) - 0| > excl_zone
622- cond_2 = ( math . ceil ( last_start_index / 2 ) - 0 ) > excl_zone
623-
624- if not cond_1 or not cond_2 :
596+ if n is not None :
597+ # The following code raises warning if there is at least one subsequence
598+ # with no non-trivial neighbor. The following logic does not check if
599+ # a subsequence has a non-finite value.
600+
601+ # Logic: For each subsequnece `S_i = T[i : i + m]`, its neighbor `S_j`
602+ # is non-trivial if |i - j| > excl_zone. Let's denote `S_jmax` as
603+ # the neighbor that is furthest away from `S_i` (index-wise). So:
604+ # |i - jmax| >= |i - j|
605+ # Therefore, if `S_i` has at least one non-trivial neighbor, then `S_jmax` is
606+ # definitely a non-trivial neighbor. Because:
607+ # |i - jmax| >= |i - j| > excl_zone
608+ # To ensure ALL subsequences have at least one non-trivial neighbor, we can just
609+ # check the subsequence `S_i` that has the minimum |i - jmax|. Let's denote `d`
610+ # as that minimum value. So, if d > excl_zone, then:
611+ # For any `i` and its corresponding `jmax`, we have:
612+ # |i - jmax| >= d > excl_zone
613+
614+ # The minimum |i - jmax| is achieved when `S_i` is the middle ubsequence,
615+ # i.e. i == int(ceil((n - m) / 2)), and its corresponding jmax is 0. Hence,
616+ # we just need to make sure the following inequity is satisfied:
617+ # |int( ceil((n - m) / 2)) - 0| > excl_zone`
618+
619+ excl_zone = int ( math . ceil ( m / config . STUMPY_EXCL_ZONE_DENOM ))
620+ if ( int ( math . ceil (( n - m ) / 2 )) - 0 ) <= excl_zone :
625621 msg = (
626622 f"The window size, 'm = { m } ', may be too large and could lead to "
627623 + "meaningless results. Consider reducing 'm' where necessary"
0 commit comments