Skip to content

Commit cf86eee

Browse files
committed
add tests for title pattern unapply
1 parent 48e0ac9 commit cf86eee

File tree

1 file changed

+281
-0
lines changed

1 file changed

+281
-0
lines changed
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_title_pattern_unapply.py
2+
3+
from django.test import TestCase
4+
5+
from sde_collections.models.delta_patterns import (
6+
DeltaResolvedTitle,
7+
DeltaResolvedTitleError,
8+
DeltaTitlePattern,
9+
)
10+
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl
11+
12+
from .factories import CollectionFactory, DumpUrlFactory
13+
14+
15+
class TestTitlePatternUnapplyLogic(TestCase):
16+
"""Test complete lifecycle of title pattern application and removal."""
17+
18+
def setUp(self):
19+
self.collection = CollectionFactory()
20+
21+
def test_dump_to_delta_migration_with_pattern_lifecycle(self):
22+
"""
23+
Test complete lifecycle:
24+
1. Create dump URLs
25+
2. Migrate to delta URLs
26+
3. Apply title pattern
27+
4. Promote to curated
28+
5. Delete pattern
29+
6. Verify deltas are created
30+
7. Promote to curated
31+
8. Verify curated URLs have empty generated titles
32+
"""
33+
# Create initial dump URLs
34+
[
35+
DumpUrlFactory(
36+
collection=self.collection,
37+
url=f"https://example.com/science/data{i}.html",
38+
)
39+
for i in range(3)
40+
]
41+
42+
# Migrate dump to delta
43+
self.collection.migrate_dump_to_delta()
44+
45+
# Apply title pattern
46+
pattern = DeltaTitlePattern.objects.create(
47+
collection=self.collection,
48+
match_pattern="https://example.com/science/*.html",
49+
match_pattern_type=DeltaTitlePattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
50+
title_pattern="Science Document {url}",
51+
)
52+
53+
# Verify pattern was applied to all deltas and resolution tracked
54+
for delta_url in DeltaUrl.objects.all():
55+
self.assertTrue(delta_url.generated_title.startswith("Science Document"))
56+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=pattern).exists())
57+
58+
# Promote to curated
59+
self.collection.promote_to_curated()
60+
61+
# Verify promotion
62+
self.assertEqual(CuratedUrl.objects.count(), 3)
63+
self.assertEqual(DeltaUrl.objects.count(), 0)
64+
for curated_url in CuratedUrl.objects.all():
65+
self.assertTrue(curated_url.generated_title.startswith("Science Document"))
66+
67+
# Remove pattern
68+
pattern.delete()
69+
70+
# Verify new deltas created with empty titles
71+
self.assertEqual(DeltaUrl.objects.count(), 3)
72+
for delta_url in DeltaUrl.objects.all():
73+
self.assertEqual(delta_url.generated_title, "")
74+
75+
# Verify resolution tracking cleared
76+
self.assertEqual(DeltaResolvedTitle.objects.count(), 0)
77+
self.assertEqual(DeltaResolvedTitleError.objects.count(), 0)
78+
79+
def test_pattern_removal_with_delta_only(self):
80+
"""Test pattern removal when delta exists without corresponding curated URL."""
81+
# Create initial delta URL
82+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/new.html")
83+
84+
# Create and apply pattern
85+
pattern = DeltaTitlePattern.objects.create(
86+
collection=self.collection, match_pattern=delta_url.url, title_pattern="New Document {url}"
87+
)
88+
89+
# Verify pattern was applied
90+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
91+
self.assertTrue(delta_url.generated_title.startswith("New Document"))
92+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=pattern).exists())
93+
94+
# Remove pattern
95+
pattern.delete()
96+
97+
# Verify delta still exists but with empty title
98+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
99+
self.assertEqual(delta_url.generated_title, "")
100+
self.assertEqual(DeltaResolvedTitle.objects.count(), 0)
101+
102+
def test_pattern_removal_with_simple_delta(self):
103+
"""Test pattern removal when delta was created just to apply pattern."""
104+
# Create initial curated URL
105+
curated_url = CuratedUrl.objects.create(
106+
collection=self.collection, url="https://example.com/doc.html", generated_title=""
107+
)
108+
109+
# Create and apply pattern
110+
pattern = DeltaTitlePattern.objects.create(
111+
collection=self.collection, match_pattern=curated_url.url, title_pattern="Documentation {url}"
112+
)
113+
114+
# Verify delta was created with pattern's title
115+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
116+
self.assertTrue(delta_url.generated_title.startswith("Documentation"))
117+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=pattern).exists())
118+
119+
# Remove pattern
120+
pattern.delete()
121+
122+
# Verify delta was deleted since it would match curated
123+
self.assertEqual(DeltaUrl.objects.filter(url=curated_url.url).count(), 0)
124+
self.assertEqual(DeltaResolvedTitle.objects.count(), 0)
125+
126+
def test_pattern_removal_preserves_other_changes(self):
127+
"""Test pattern removal when delta has other changes that should be preserved."""
128+
# Create curated URL
129+
curated_url = CuratedUrl.objects.create(
130+
collection=self.collection,
131+
url="https://example.com/doc.html",
132+
generated_title="",
133+
scraped_title="Original Title",
134+
)
135+
136+
# Create delta with modified title
137+
delta_url = DeltaUrl.objects.create(
138+
collection=self.collection, url=curated_url.url, generated_title="", scraped_title="Modified Title"
139+
)
140+
141+
# Create and apply pattern
142+
pattern = DeltaTitlePattern.objects.create(
143+
collection=self.collection, match_pattern=curated_url.url, title_pattern="API Doc {url}"
144+
)
145+
146+
# Verify pattern was applied while preserving scraped title
147+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
148+
self.assertTrue(delta_url.generated_title.startswith("API Doc"))
149+
self.assertEqual(delta_url.scraped_title, "Modified Title")
150+
151+
# Remove pattern
152+
pattern.delete()
153+
154+
# Verify delta still exists with original changes but pattern effect removed
155+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
156+
self.assertEqual(delta_url.generated_title, "")
157+
self.assertEqual(delta_url.scraped_title, "Modified Title")
158+
159+
def test_pattern_removal_with_multiple_patterns(self):
160+
"""Test removal of one pattern when URL is affected by multiple patterns."""
161+
# Create initial delta URL
162+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/doc.html")
163+
164+
# Create specific pattern
165+
specific_pattern = DeltaTitlePattern.objects.create(
166+
collection=self.collection, match_pattern=delta_url.url, title_pattern="Specific Title {url}"
167+
)
168+
169+
# Create another pattern for the same URL
170+
generic_pattern = DeltaTitlePattern.objects.create(
171+
collection=self.collection,
172+
match_pattern="https://example.com/*.html",
173+
match_pattern_type=DeltaTitlePattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
174+
title_pattern="Generic Title {url}",
175+
)
176+
177+
# Verify specific pattern takes precedence
178+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
179+
self.assertTrue(delta_url.generated_title.startswith("Specific Title"))
180+
181+
# Verify resolution tracking
182+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=specific_pattern).exists())
183+
184+
# Remove specific pattern
185+
specific_pattern.delete()
186+
187+
# Verify general pattern is now applied
188+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
189+
self.assertTrue(delta_url.generated_title.startswith("Generic Title"))
190+
191+
# Verify resolution tracking updated
192+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=generic_pattern).exists())
193+
194+
def test_specific_pattern_removal_with_overlapping_patterns(self):
195+
"""Test removal of specific pattern when more general pattern exists."""
196+
# Create initial delta URL
197+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/docs/api/v2/spec.html")
198+
199+
# Create general pattern
200+
DeltaTitlePattern.objects.create(
201+
collection=self.collection,
202+
match_pattern="https://example.com/docs/*.html",
203+
match_pattern_type=DeltaTitlePattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
204+
title_pattern="General Document {url}",
205+
)
206+
207+
# Create specific pattern
208+
specific_pattern = DeltaTitlePattern.objects.create(
209+
collection=self.collection, match_pattern=delta_url.url, title_pattern="API Spec {url}"
210+
)
211+
212+
# Verify specific pattern took precedence
213+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
214+
self.assertTrue(delta_url.generated_title.startswith("API Spec"))
215+
216+
# Remove specific pattern
217+
specific_pattern.delete()
218+
219+
# Verify general pattern now applies
220+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
221+
self.assertTrue(delta_url.generated_title.startswith("General Document"))
222+
223+
def test_general_pattern_removal_with_overlapping_patterns(self):
224+
"""Test removal of general pattern when more specific pattern exists."""
225+
# Create initial delta URL
226+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/docs/api/v2/spec.html")
227+
228+
# Create general pattern
229+
general_pattern = DeltaTitlePattern.objects.create(
230+
collection=self.collection,
231+
match_pattern="https://example.com/docs/*.html",
232+
match_pattern_type=DeltaTitlePattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
233+
title_pattern="General Document {url}",
234+
)
235+
236+
# Create specific pattern
237+
specific_pattern = DeltaTitlePattern.objects.create(
238+
collection=self.collection, match_pattern=delta_url.url, title_pattern="API Spec {url}"
239+
)
240+
241+
# Verify specific pattern takes precedence
242+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
243+
self.assertTrue(delta_url.generated_title.startswith("API Spec"))
244+
245+
# Verify correct resolution tracking
246+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=specific_pattern).exists())
247+
248+
# Remove general pattern
249+
general_pattern.delete()
250+
251+
# Verify specific pattern still applies
252+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
253+
self.assertTrue(delta_url.generated_title.startswith("API Spec"))
254+
255+
# Verify resolution tracking unchanged
256+
self.assertTrue(DeltaResolvedTitle.objects.filter(delta_url=delta_url, title_pattern=specific_pattern).exists())
257+
258+
def test_pattern_removal_with_title_error(self):
259+
"""Test handling of title resolution errors during pattern removal."""
260+
# Create initial delta URL
261+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/doc.html")
262+
263+
# Create pattern that will cause error (invalid template)
264+
pattern = DeltaTitlePattern.objects.create(
265+
collection=self.collection,
266+
match_pattern=delta_url.url,
267+
title_pattern="{invalid}", # This should cause an error
268+
)
269+
270+
# Verify error was recorded
271+
self.assertTrue(DeltaResolvedTitleError.objects.filter(delta_url=delta_url, title_pattern=pattern).exists())
272+
273+
# Remove pattern
274+
pattern.delete()
275+
276+
# Verify error tracking cleared
277+
self.assertEqual(DeltaResolvedTitleError.objects.count(), 0)
278+
279+
# Verify delta has empty title
280+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
281+
self.assertEqual(delta_url.generated_title, "")

0 commit comments

Comments
 (0)