@@ -570,7 +570,8 @@ def check_window_size(m, max_size=None, n=None):
570570 The maximum window size allowed
571571
572572 n : int, default None
573- The length of the time series in a self-join case
573+ The length of the time series in the case of a self-join.
574+ `n` should be set to `None` in the case of an `AB-join`.
574575
575576 Returns
576577 -------
@@ -595,19 +596,37 @@ def check_window_size(m, max_size=None, n=None):
595596
596597 if n is not None :
597598 # Raise warning if there is at least one subsequence with no
598- # non-trivial neighbour in a self-join case
599+ # non-trivial neighbour in the case of a self-join.
600+
601+ # For any time series `T`, an "eligible nearest neighbor" subsequence for
602+ # the central-most subsequence must be located outside the `excl_zone`.
603+ # The central-most subsequence will ALWAYS have the smallest gap
604+ # to its furthest "eligible nearest neighbor" among all other subsequences.
605+ # Therefore, we only need to check whether the `excl_zone` eliminates all
606+ # "nearest neighbors" for the central-most subsequence in `T`.
607+ # In fact, we just need to verify whether the `excl_zone` eliminates
608+ # the "nearest neighbor" that is furthest away (index-wise) from
609+ # the central-most subsequence. If it does not, this implies that
610+ # all other subsequences in `T` will have at least one or more
611+ # eligible nearest neighbors outside their respective `excl_zone
599612
600613 excl_zone = int (math .ceil (m / config .STUMPY_EXCL_ZONE_DENOM ))
601614
602615 l = n - m + 1
603- indices = np .arange (l )
604-
605- # Compute the maximum index-wise gap between each subsequence
606- # and its neighbours. For any subsequence:
607- # The leftmost neighbor is at index `0`
608- # The rightmost neighbor is at index `l-1`
609- max_gaps = np .maximum (indices - 0 , (l - 1 ) - indices )
610- if np .any (max_gaps <= excl_zone ):
616+ max_gap = l // 2
617+ # The index-wise gap between central-most subsequence
618+ # and its furthest neighbor:
619+
620+ # If `l` is odd (`l == 2k+1`):
621+ # The central subsequence is at index `k`, with furthest neighbors at `0`
622+ # and `2k`, both `k == l // 2` indices away.
623+
624+ # If `l` is even (`l == 2k`):
625+ # The central subsequences are at `k-1` and `k`. The furthest neighbor is
626+ # at `2k-1` for `k-1`, and `0` for `k`. In both cases, the subsequence
627+ # and its furthest neighbor are `k == l // 2` indices away.
628+
629+ if max_gap <= excl_zone :
611630 msg = (
612631 f"The window size, 'm = { m } ', may be too large and could lead to "
613632 + "meaningless results. Consider reducing 'm' where necessary"
0 commit comments