@@ -275,6 +275,102 @@ def test_non_contiguous_subsequences() -> None:
275275 assert level_2_patterns [("a" , "c" )] == 3 , f"Expected support 3 for ('a', 'c'), got { level_2_patterns [('a' , 'c' )]} "
276276
277277
278+ def test_contiguous_vs_non_contiguous_patterns () -> None :
279+ """
280+ Comprehensive test demonstrating the difference between contiguous and non-contiguous patterns.
281+
282+ This test shows patterns that would ONLY be found in non-contiguous matching (current implementation)
283+ vs patterns that would be found in BOTH contiguous and non-contiguous matching.
284+
285+ The current implementation uses non-contiguous (ordered) matching, which is the standard GSP behavior.
286+ """
287+ sequences = [
288+ ["X" , "Y" , "Z" ], # Contains X->Y, Y->Z, X->Z (contiguous: X->Y, Y->Z only)
289+ ["X" , "Z" ], # Contains X->Z (contiguous: X->Z)
290+ ["Y" , "Z" , "X" ], # Contains Y->Z, Y->X, Z->X (contiguous: Y->Z, Z->X only)
291+ ["X" , "Y" , "Z" , "W" ], # Contains many patterns
292+ ]
293+
294+ gsp = GSP (sequences )
295+ result = gsp .search (min_support = 0.5 ) # Need at least 2/4 sequences
296+
297+ # Level 2 patterns
298+ level_2_patterns = result [1 ] if len (result ) >= 2 else {}
299+
300+ # Patterns that would be found in BOTH contiguous and non-contiguous:
301+ # ('X', 'Y') appears contiguously in: ['X', 'Y', 'Z'], ['X', 'Y', 'Z', 'W']
302+ # ('Y', 'Z') appears contiguously in: ['X', 'Y', 'Z'], ['Y', 'Z', 'X'], ['X', 'Y', 'Z', 'W']
303+ assert ("X" , "Y" ) in level_2_patterns , "('X', 'Y') should be found (contiguous in 2 sequences)"
304+ assert ("Y" , "Z" ) in level_2_patterns , "('Y', 'Z') should be found (contiguous in 3 sequences)"
305+
306+ # Pattern that would ONLY be found in non-contiguous matching:
307+ # ('X', 'Z') appears with gap in: ['X', 'Y', 'Z'], ['X', 'Y', 'Z', 'W']
308+ # and contiguously in: ['X', 'Z']
309+ # Total support = 3 (>= 2 threshold)
310+ assert ("X" , "Z" ) in level_2_patterns , (
311+ "('X', 'Z') should be found with non-contiguous matching. "
312+ "This pattern has gaps in some sequences but is still ordered."
313+ )
314+ assert level_2_patterns [("X" , "Z" )] == 3 , f"Expected support 3 for ('X', 'Z'), got { level_2_patterns [('X' , 'Z' )]} "
315+
316+
317+ def test_non_contiguous_with_longer_gaps () -> None :
318+ """
319+ Test non-contiguous matching with longer gaps between elements.
320+
321+ This demonstrates that the algorithm correctly finds patterns even when
322+ there are multiple elements between the pattern elements.
323+ """
324+ sequences = [
325+ ["A" , "B" , "C" , "D" , "E" ], # Contains A->E with 3 elements in between
326+ ["A" , "X" , "Y" , "Z" , "E" ], # Contains A->E with 3 different elements in between
327+ ["A" , "E" ], # Contains A->E with no gap
328+ ["E" , "A" ], # Does NOT contain A->E (wrong order)
329+ ]
330+
331+ gsp = GSP (sequences )
332+ result = gsp .search (min_support = 0.5 ) # Need at least 2/4 sequences
333+
334+ # ('A', 'E') should be found with support = 3
335+ level_2_patterns = result [1 ] if len (result ) >= 2 else {}
336+ assert ("A" , "E" ) in level_2_patterns , "('A', 'E') should be found despite large gaps"
337+ assert level_2_patterns [("A" , "E" )] == 3 , f"Expected support 3 for ('A', 'E'), got { level_2_patterns [('A' , 'E' )]} "
338+
339+ # ('E', 'A') should NOT be found (wrong order)
340+ assert ("E" , "A" ) not in level_2_patterns , "('E', 'A') should not be found (wrong order)"
341+
342+
343+ def test_order_sensitivity () -> None :
344+ """
345+ Test that the algorithm is sensitive to order - patterns must appear in sequence order.
346+
347+ This verifies that even with non-contiguous matching, the order of elements matters.
348+ """
349+ sequences = [
350+ ["P" , "Q" , "R" ], # Contains P->Q, P->R, Q->R
351+ ["P" , "R" , "Q" ], # Contains P->R, P->Q, R->Q
352+ ["Q" , "P" , "R" ], # Contains Q->P, Q->R, P->R
353+ ["R" , "Q" , "P" ], # Contains R->Q, R->P, Q->P
354+ ]
355+
356+ gsp = GSP (sequences )
357+ result = gsp .search (min_support = 0.5 ) # Need at least 2/4 sequences
358+
359+ level_2_patterns = result [1 ] if len (result ) >= 2 else {}
360+
361+ # ('P', 'R') appears in correct order in: ['P', 'Q', 'R'], ['P', 'R', 'Q'], ['Q', 'P', 'R']
362+ assert ("P" , "R" ) in level_2_patterns , "('P', 'R') should be found (support = 3)"
363+ assert level_2_patterns [("P" , "R" )] == 3
364+
365+ # ('Q', 'P') appears in correct order in: ['Q', 'P', 'R'], ['R', 'Q', 'P']
366+ assert ("Q" , "P" ) in level_2_patterns , "('Q', 'P') should be found (support = 2)"
367+ assert level_2_patterns [("Q" , "P" )] == 2
368+
369+ # ('R', 'P') appears in correct order in: ['R', 'Q', 'P']
370+ # Support = 1, below threshold of 2
371+ assert ("R" , "P" ) not in level_2_patterns , "('R', 'P') should not be found (support = 1, below threshold)"
372+
373+
278374@pytest .mark .parametrize ("min_support" , [0.1 , 0.2 , 0.3 , 0.4 , 0.5 ])
279375def test_benchmark (benchmark : BenchmarkFixture , supermarket_transactions : List [List [str ]], min_support : float ) -> None :
280376 """
0 commit comments