@@ -554,11 +554,12 @@ def get_max_window_size(n):
554554 return max_m
555555
556556
557- def check_window_size (m , max_size = None ):
557+ def check_window_size (m , max_size = None , excl_zone = None , last_start_index = None ):
558558 """
559559 Check the window size and ensure that it is greater than or equal to 3 and, if
560560 `max_size` is provided, ensure that the window size is less than or equal to the
561- `max_size`
561+ `max_size`. Furthermore, if `excl_zone` is provided, then it will also check if the
562+ window size is too large and could lead to meaningless results.
562563
563564 Parameters
564565 ----------
@@ -568,6 +569,13 @@ def check_window_size(m, max_size=None):
568569 max_size : int, default None
569570 The maximum window size allowed
570571
572+ excl_zone : int, default None
573+ The exclusion zone. If provided, then the `last_start_index` must also be
574+ provided.
575+
576+ last_start_index : int, default None
577+ The start index of last subsequence.
578+
571579 Returns
572580 -------
573581 None
@@ -589,6 +597,37 @@ def check_window_size(m, max_size=None):
589597 if max_size is not None and m > max_size :
590598 raise ValueError (f"The window size must be less than or equal to { max_size } " )
591599
600+ if excl_zone is not None :
601+ if last_start_index is None :
602+ raise ValueError (
603+ "last_start_index must be provided when excl_zone is not None"
604+ )
605+
606+ # Check if subsequneces have non-trivial neighbours
607+
608+ # Case 1:
609+ # There is at least one subsequence with non-trivial neighbour
610+ # i.e. For AN `i`, there exists at least one `j` such that |i - j| > excl_zone
611+ # In this case, we just need to consider the two subsequences that are furthest
612+ # apart from each other.
613+ # In other words: |0 - last_start_index| > excl_zone
614+ cond_1 = last_start_index <= excl_zone
615+
616+ # Case 2:
617+ # Check if each single subsequence has at least one non-trivial neighbor
618+ # i. e. For ANY `i`, there exists at least one `j` such that |i - j| > excl_zone
619+ # In this case, we need to consider the subseuqence whose furthest neighbour is
620+ # the shortest compared to other subsequences.
621+ # In other words: |0 - ceil(last_start_index / 2)| > excl_zone
622+ cond_2 = math .ceil (last_start_index / 2 ) <= excl_zone
623+
624+ if cond_1 or cond_2 :
625+ msg = (
626+ f"The window size, 'm = { m } ', may be too large and could lead to "
627+ + "meaningless results. Consider reducing 'm' where necessary"
628+ )
629+ warnings .warn (msg )
630+
592631
593632@njit (fastmath = config .STUMPY_FASTMATH_TRUE )
594633def _sliding_dot_product (Q , T ):
0 commit comments