Skip to content

Commit 48e0ac9

Browse files
committed
add dedicated test suite for field modifier unapply
1 parent 393402c commit 48e0ac9

File tree

1 file changed

+252
-0
lines changed

1 file changed

+252
-0
lines changed
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_field_modifier_unapply.py
2+
3+
from django.test import TestCase
4+
5+
from sde_collections.models.collection_choice_fields import Divisions, DocumentTypes
6+
from sde_collections.models.delta_patterns import (
7+
DeltaDivisionPattern,
8+
DeltaDocumentTypePattern,
9+
)
10+
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl, DumpUrl
11+
12+
from .factories import CollectionFactory, DumpUrlFactory
13+
14+
15+
class TestDeltaPatternUnapplyLogic(TestCase):
16+
"""Test complete lifecycle of pattern application and removal."""
17+
18+
def setUp(self):
19+
self.collection = CollectionFactory()
20+
21+
def test_dump_to_delta_migration_with_pattern_lifecycle(self):
22+
"""
23+
Test complete lifecycle:
24+
1. Create dump URLs
25+
2. Migrate to delta URLs
26+
3. Apply patterns
27+
4. Promote to curated
28+
5. Delete pattern
29+
6. Verify deltas are created
30+
7. Promote to curated
31+
8. Verify curated URLs have division set to None
32+
"""
33+
# Create initial dump URLs
34+
[
35+
DumpUrlFactory(
36+
collection=self.collection,
37+
url=f"https://example.com/science/data{i}.html",
38+
)
39+
for i in range(3)
40+
]
41+
42+
# Migrate dump to delta
43+
self.collection.migrate_dump_to_delta()
44+
45+
# Verify dump URLs were migrated to delta URLs
46+
self.assertEqual(DeltaUrl.objects.count(), 3)
47+
self.assertEqual(DumpUrl.objects.count(), 0)
48+
49+
# Apply division pattern
50+
pattern = DeltaDivisionPattern.objects.create(
51+
collection=self.collection,
52+
match_pattern="https://example.com/science/*.html",
53+
match_pattern_type=DeltaDivisionPattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
54+
division=Divisions.BIOLOGY,
55+
)
56+
57+
# Verify pattern was applied to existing deltas
58+
for delta_url in DeltaUrl.objects.all():
59+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
60+
61+
# Promote to curated
62+
self.collection.promote_to_curated()
63+
64+
# Verify promotion
65+
self.assertEqual(CuratedUrl.objects.count(), 3)
66+
self.assertEqual(DeltaUrl.objects.count(), 0)
67+
for curated_url in CuratedUrl.objects.all():
68+
self.assertEqual(curated_url.division, Divisions.BIOLOGY)
69+
70+
# Remove pattern
71+
pattern.delete()
72+
73+
# Should have created new deltas for all URLs setting division to None
74+
self.assertEqual(DeltaUrl.objects.count(), 3)
75+
for delta_url in DeltaUrl.objects.all():
76+
self.assertIsNone(delta_url.division)
77+
78+
# Promote to curated
79+
self.collection.promote_to_curated()
80+
81+
# Should updated all Curated setting division to None
82+
self.assertEqual(CuratedUrl.objects.count(), 3)
83+
for delta_url in CuratedUrl.objects.all():
84+
self.assertIsNone(delta_url.division)
85+
86+
# Test for README_UNNAPLY_LOGIC.md Case 1: Delta Only (New URL)
87+
def test_pattern_removal_with_delta_only(self):
88+
"""Test pattern removal when delta exists without corresponding curated URL."""
89+
# Create initial delta URL (simulating a new URL)
90+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/new.html")
91+
92+
# Create and apply pattern
93+
pattern = DeltaDivisionPattern.objects.create(
94+
collection=self.collection, match_pattern=delta_url.url, division=Divisions.BIOLOGY
95+
)
96+
97+
# Verify pattern was applied
98+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
99+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
100+
101+
# Remove pattern
102+
pattern.delete()
103+
104+
# Verify delta still exists but with division set to None
105+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
106+
self.assertIsNone(delta_url.division)
107+
self.assertEqual(DeltaUrl.objects.count(), 1)
108+
109+
# Test for README_UNNAPLY_LOGIC.md Case 2: Delta Created to Apply Pattern
110+
def test_pattern_removal_with_simple_delta(self):
111+
"""Test pattern removal when delta was created just to apply pattern."""
112+
# Create initial curated URL
113+
curated_url = CuratedUrl.objects.create(
114+
collection=self.collection, url="https://example.com/doc.html", division=None
115+
)
116+
117+
# Create and apply pattern
118+
pattern = DeltaDivisionPattern.objects.create(
119+
collection=self.collection, match_pattern=curated_url.url, division=Divisions.BIOLOGY
120+
)
121+
122+
# Verify delta was created with pattern's value
123+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
124+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
125+
126+
# Remove pattern
127+
pattern.delete()
128+
129+
# Verify delta was deleted since it would match curated
130+
self.assertEqual(DeltaUrl.objects.filter(url=curated_url.url).count(), 0)
131+
132+
# Test for README_UNNAPLY_LOGIC.md Case 3: Pre-existing Delta
133+
def test_pattern_removal_preserves_other_changes(self):
134+
"""Test pattern removal when delta has other changes that should be preserved."""
135+
# Create curated URL
136+
curated_url = CuratedUrl.objects.create(
137+
collection=self.collection,
138+
url="https://example.com/doc.html",
139+
division=None,
140+
scraped_title="Original Title",
141+
)
142+
143+
# Create delta with modified title
144+
delta_url = DeltaUrl.objects.create(
145+
collection=self.collection, url=curated_url.url, division=None, scraped_title="Modified Title"
146+
)
147+
148+
# Create and apply pattern
149+
pattern = DeltaDivisionPattern.objects.create(
150+
collection=self.collection, match_pattern=curated_url.url, division=Divisions.BIOLOGY
151+
)
152+
153+
# Verify pattern was applied while preserving title
154+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
155+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
156+
self.assertEqual(delta_url.scraped_title, "Modified Title")
157+
158+
# Remove pattern
159+
pattern.delete()
160+
161+
# Verify delta still exists with original changes but pattern effect removed
162+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
163+
self.assertIsNone(delta_url.division)
164+
self.assertEqual(delta_url.scraped_title, "Modified Title")
165+
166+
# Test for README_UNNAPLY_LOGIC.md Case 4: Multiple Pattern Effects
167+
def test_pattern_removal_with_multiple_patterns(self):
168+
"""Test removal of one pattern when URL is affected by multiple patterns."""
169+
# Create curated URL
170+
curated_url = CuratedUrl.objects.create(collection=self.collection, url="https://example.com/doc.html")
171+
172+
# Create two patterns affecting the same URL
173+
division_pattern = DeltaDivisionPattern.objects.create(
174+
collection=self.collection, match_pattern=curated_url.url, division=Divisions.BIOLOGY
175+
)
176+
177+
DeltaDocumentTypePattern.objects.create(
178+
collection=self.collection, match_pattern=curated_url.url, document_type=DocumentTypes.DATA
179+
)
180+
181+
# Verify both patterns were applied
182+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
183+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
184+
self.assertEqual(delta_url.document_type, DocumentTypes.DATA)
185+
186+
# Remove division pattern
187+
division_pattern.delete()
188+
189+
# Verify delta still exists with doc type but division removed
190+
delta_url = DeltaUrl.objects.get(url=curated_url.url)
191+
self.assertIsNone(delta_url.division)
192+
self.assertEqual(delta_url.document_type, DocumentTypes.DATA)
193+
194+
# Test for Case 5: Overlapping Patterns, Specific Deleted
195+
def test_specific_pattern_removal_with_overlapping_patterns(self):
196+
"""Test removal of specific pattern when more general pattern exists."""
197+
# Create initial delta URL
198+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/docs/api/v2/spec.html")
199+
200+
# Create general pattern
201+
DeltaDivisionPattern.objects.create(
202+
collection=self.collection,
203+
match_pattern="https://example.com/docs/*.html",
204+
match_pattern_type=DeltaDivisionPattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
205+
division=Divisions.BIOLOGY,
206+
)
207+
208+
# Create specific pattern
209+
specific_pattern = DeltaDivisionPattern.objects.create(
210+
collection=self.collection, match_pattern=delta_url.url, division=Divisions.ASTROPHYSICS
211+
)
212+
213+
# Verify specific pattern took precedence
214+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
215+
self.assertEqual(delta_url.division, Divisions.ASTROPHYSICS)
216+
217+
# Remove specific pattern
218+
specific_pattern.delete()
219+
220+
# Verify general pattern now applies
221+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
222+
self.assertEqual(delta_url.division, Divisions.BIOLOGY)
223+
224+
# Test for Case 6: Overlapping Patterns, General Deleted
225+
def test_general_pattern_removal_with_overlapping_patterns(self):
226+
"""Test removal of general pattern when more specific pattern exists."""
227+
# Create initial delta URL
228+
delta_url = DeltaUrl.objects.create(collection=self.collection, url="https://example.com/docs/api/v2/spec.html")
229+
230+
# Create general pattern
231+
general_pattern = DeltaDivisionPattern.objects.create(
232+
collection=self.collection,
233+
match_pattern="https://example.com/docs/*.html",
234+
match_pattern_type=DeltaDivisionPattern.MatchPatternTypeChoices.MULTI_URL_PATTERN,
235+
division=Divisions.BIOLOGY,
236+
)
237+
238+
# Create specific pattern
239+
DeltaDivisionPattern.objects.create(
240+
collection=self.collection, match_pattern=delta_url.url, division=Divisions.ASTROPHYSICS
241+
)
242+
243+
# Verify specific pattern takes precedence
244+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
245+
self.assertEqual(delta_url.division, Divisions.ASTROPHYSICS)
246+
247+
# Remove general pattern
248+
general_pattern.delete()
249+
250+
# Verify specific pattern still applies
251+
delta_url = DeltaUrl.objects.get(url=delta_url.url)
252+
self.assertEqual(delta_url.division, Divisions.ASTROPHYSICS)

0 commit comments

Comments
 (0)