Skip to content

Commit dc88d84

Browse files
Add comprehensive tests demonstrating contiguous vs non-contiguous patterns
- Added test_contiguous_vs_non_contiguous_patterns to show patterns found in both modes - Added test_non_contiguous_with_longer_gaps to test matching with large gaps - Added test_order_sensitivity to verify order requirements - Added test_is_subsequence_contiguous_vs_non_contiguous in utils tests - Added test_is_subsequence_with_gaps to test various gap sizes - All 44 tests pass (5 new tests added) Co-authored-by: jacksonpradolima <7774063+jacksonpradolima@users.noreply.github.com>
1 parent 5afe941 commit dc88d84

File tree

2 files changed

+154
-0
lines changed

2 files changed

+154
-0
lines changed

tests/test_gsp.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,102 @@ def test_non_contiguous_subsequences() -> None:
275275
assert level_2_patterns[("a", "c")] == 3, f"Expected support 3 for ('a', 'c'), got {level_2_patterns[('a', 'c')]}"
276276

277277

278+
def test_contiguous_vs_non_contiguous_patterns() -> None:
279+
"""
280+
Comprehensive test demonstrating the difference between contiguous and non-contiguous patterns.
281+
282+
This test shows patterns that would ONLY be found in non-contiguous matching (current implementation)
283+
vs patterns that would be found in BOTH contiguous and non-contiguous matching.
284+
285+
The current implementation uses non-contiguous (ordered) matching, which is the standard GSP behavior.
286+
"""
287+
sequences = [
288+
["X", "Y", "Z"], # Contains X->Y, Y->Z, X->Z (contiguous: X->Y, Y->Z only)
289+
["X", "Z"], # Contains X->Z (contiguous: X->Z)
290+
["Y", "Z", "X"], # Contains Y->Z, Y->X, Z->X (contiguous: Y->Z, Z->X only)
291+
["X", "Y", "Z", "W"], # Contains many patterns
292+
]
293+
294+
gsp = GSP(sequences)
295+
result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
296+
297+
# Level 2 patterns
298+
level_2_patterns = result[1] if len(result) >= 2 else {}
299+
300+
# Patterns that would be found in BOTH contiguous and non-contiguous:
301+
# ('X', 'Y') appears contiguously in: ['X', 'Y', 'Z'], ['X', 'Y', 'Z', 'W']
302+
# ('Y', 'Z') appears contiguously in: ['X', 'Y', 'Z'], ['Y', 'Z', 'X'], ['X', 'Y', 'Z', 'W']
303+
assert ("X", "Y") in level_2_patterns, "('X', 'Y') should be found (contiguous in 2 sequences)"
304+
assert ("Y", "Z") in level_2_patterns, "('Y', 'Z') should be found (contiguous in 3 sequences)"
305+
306+
# Pattern that would ONLY be found in non-contiguous matching:
307+
# ('X', 'Z') appears with gap in: ['X', 'Y', 'Z'], ['X', 'Y', 'Z', 'W']
308+
# and contiguously in: ['X', 'Z']
309+
# Total support = 3 (>= 2 threshold)
310+
assert ("X", "Z") in level_2_patterns, (
311+
"('X', 'Z') should be found with non-contiguous matching. "
312+
"This pattern has gaps in some sequences but is still ordered."
313+
)
314+
assert level_2_patterns[("X", "Z")] == 3, f"Expected support 3 for ('X', 'Z'), got {level_2_patterns[('X', 'Z')]}"
315+
316+
317+
def test_non_contiguous_with_longer_gaps() -> None:
318+
"""
319+
Test non-contiguous matching with longer gaps between elements.
320+
321+
This demonstrates that the algorithm correctly finds patterns even when
322+
there are multiple elements between the pattern elements.
323+
"""
324+
sequences = [
325+
["A", "B", "C", "D", "E"], # Contains A->E with 3 elements in between
326+
["A", "X", "Y", "Z", "E"], # Contains A->E with 3 different elements in between
327+
["A", "E"], # Contains A->E with no gap
328+
["E", "A"], # Does NOT contain A->E (wrong order)
329+
]
330+
331+
gsp = GSP(sequences)
332+
result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
333+
334+
# ('A', 'E') should be found with support = 3
335+
level_2_patterns = result[1] if len(result) >= 2 else {}
336+
assert ("A", "E") in level_2_patterns, "('A', 'E') should be found despite large gaps"
337+
assert level_2_patterns[("A", "E")] == 3, f"Expected support 3 for ('A', 'E'), got {level_2_patterns[('A', 'E')]}"
338+
339+
# ('E', 'A') should NOT be found (wrong order)
340+
assert ("E", "A") not in level_2_patterns, "('E', 'A') should not be found (wrong order)"
341+
342+
343+
def test_order_sensitivity() -> None:
344+
"""
345+
Test that the algorithm is sensitive to order - patterns must appear in sequence order.
346+
347+
This verifies that even with non-contiguous matching, the order of elements matters.
348+
"""
349+
sequences = [
350+
["P", "Q", "R"], # Contains P->Q, P->R, Q->R
351+
["P", "R", "Q"], # Contains P->R, P->Q, R->Q
352+
["Q", "P", "R"], # Contains Q->P, Q->R, P->R
353+
["R", "Q", "P"], # Contains R->Q, R->P, Q->P
354+
]
355+
356+
gsp = GSP(sequences)
357+
result = gsp.search(min_support=0.5) # Need at least 2/4 sequences
358+
359+
level_2_patterns = result[1] if len(result) >= 2 else {}
360+
361+
# ('P', 'R') appears in correct order in: ['P', 'Q', 'R'], ['P', 'R', 'Q'], ['Q', 'P', 'R']
362+
assert ("P", "R") in level_2_patterns, "('P', 'R') should be found (support = 3)"
363+
assert level_2_patterns[("P", "R")] == 3
364+
365+
# ('Q', 'P') appears in correct order in: ['Q', 'P', 'R'], ['R', 'Q', 'P']
366+
assert ("Q", "P") in level_2_patterns, "('Q', 'P') should be found (support = 2)"
367+
assert level_2_patterns[("Q", "P")] == 2
368+
369+
# ('R', 'P') appears in correct order in: ['R', 'Q', 'P']
370+
# Support = 1, below threshold of 2
371+
assert ("R", "P") not in level_2_patterns, "('R', 'P') should not be found (support = 1, below threshold)"
372+
373+
278374
@pytest.mark.parametrize("min_support", [0.1, 0.2, 0.3, 0.4, 0.5])
279375
def test_benchmark(benchmark: BenchmarkFixture, supermarket_transactions: List[List[str]], min_support: float) -> None:
280376
"""

tests/test_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,64 @@ def test_is_subsequence_in_list():
6767
assert not is_subsequence_in_list((1, 2, 3, 4), (1, 2, 3)), "Failed to reject long subsequence"
6868

6969

70+
def test_is_subsequence_contiguous_vs_non_contiguous():
71+
"""
72+
Test cases that demonstrate the difference between contiguous and non-contiguous matching.
73+
74+
The current implementation uses non-contiguous (ordered) matching.
75+
This test documents patterns that would differ between the two approaches.
76+
"""
77+
# Pattern that appears with gaps (non-contiguous)
78+
# In contiguous mode: would NOT match
79+
# In non-contiguous mode: DOES match
80+
assert is_subsequence_in_list(("a", "c"), ("a", "b", "c")), (
81+
"Non-contiguous: ('a', 'c') should match in ('a', 'b', 'c')"
82+
)
83+
assert is_subsequence_in_list(("a", "d"), ("a", "b", "c", "d")), (
84+
"Non-contiguous: ('a', 'd') should match in ('a', 'b', 'c', 'd')"
85+
)
86+
assert is_subsequence_in_list((1, 4), (1, 2, 3, 4, 5)), (
87+
"Non-contiguous: (1, 4) should match in (1, 2, 3, 4, 5)"
88+
)
89+
90+
# Pattern that appears contiguously (would match in both modes)
91+
assert is_subsequence_in_list(("a", "b"), ("a", "b", "c")), (
92+
"Contiguous: ('a', 'b') should match in ('a', 'b', 'c')"
93+
)
94+
assert is_subsequence_in_list((2, 3), (1, 2, 3, 4)), (
95+
"Contiguous: (2, 3) should match in (1, 2, 3, 4)"
96+
)
97+
98+
# Pattern with wrong order (would NOT match in either mode)
99+
assert not is_subsequence_in_list(("c", "a"), ("a", "b", "c")), (
100+
"Wrong order: ('c', 'a') should NOT match in ('a', 'b', 'c')"
101+
)
102+
assert not is_subsequence_in_list((3, 1), (1, 2, 3, 4)), (
103+
"Wrong order: (3, 1) should NOT match in (1, 2, 3, 4)"
104+
)
105+
106+
107+
def test_is_subsequence_with_gaps():
108+
"""
109+
Test non-contiguous matching with various gap sizes.
110+
"""
111+
# Small gap
112+
assert is_subsequence_in_list(("x", "z"), ("x", "y", "z")), "Failed with 1 element gap"
113+
114+
# Medium gap
115+
assert is_subsequence_in_list(("a", "e"), ("a", "b", "c", "d", "e")), "Failed with 3 element gap"
116+
117+
# Large gap
118+
assert is_subsequence_in_list((1, 10), (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)), "Failed with 8 element gap"
119+
120+
# Multiple gaps in longer pattern
121+
assert is_subsequence_in_list((1, 3, 5), (1, 2, 3, 4, 5)), "Failed with multiple gaps"
122+
assert is_subsequence_in_list(("a", "c", "e"), ("a", "b", "c", "d", "e")), "Failed with multiple gaps"
123+
124+
# No gap (adjacent elements still work)
125+
assert is_subsequence_in_list((1, 2), (1, 2, 3)), "Failed with no gap (contiguous)"
126+
127+
70128
def test_generate_candidates_from_previous():
71129
"""
72130
Test the `generate_candidates_from_previous` utility function.

0 commit comments

Comments
 (0)