Skip to content

Commit 178b5bc

Browse files
committed
add promotion tests for overlapping title patterns and title changes
1 parent 018c2b7 commit 178b5bc

File tree

1 file changed

+93
-1
lines changed

1 file changed

+93
-1
lines changed

sde_collections/tests/test_promote_collection.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_promote_collection.py
2-
32
import pytest
43

54
from sde_collections.models.delta_patterns import (
65
DeltaExcludePattern,
76
DeltaIncludePattern,
7+
DeltaTitlePattern,
88
)
99
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl
1010
from sde_collections.tests.factories import CollectionFactory
@@ -116,3 +116,95 @@ def test_patterns_reapplied_after_promotion(collection):
116116

117117
# Verify exclusion status
118118
assert curated_urls.filter(url="https://exclude.com", excluded=True).exists()
119+
120+
121+
@pytest.mark.django_db
122+
def test_promotion_with_overlapping_patterns_and_deletion():
123+
"""Test complex scenario with multiple overlapping patterns and URL deletion."""
124+
collection = CollectionFactory()
125+
126+
# Create a more complex set of URLs that might trigger overlapping patterns
127+
urls = [
128+
"https://example.com/docs/guide1",
129+
"https://example.com/docs/guide2",
130+
"https://example.com/api/v1/doc1",
131+
"https://example.com/api/v1/doc2",
132+
]
133+
134+
# Create initial DeltaUrls
135+
for url in urls:
136+
DeltaUrl.objects.create(collection=collection, url=url, scraped_title=f"Title for {url}")
137+
138+
# Create overlapping patterns that will affect the same URLs
139+
patterns = [
140+
{"pattern": ".*docs.*", "title": "Documentation: {title}"},
141+
{"pattern": ".*guide.*", "title": "Guide: {title}"},
142+
{"pattern": ".*api.*", "title": "API: {title}"},
143+
{"pattern": ".*doc[0-9]", "title": "Doc Number: {title}"},
144+
]
145+
146+
# Create and apply multiple patterns
147+
title_patterns = []
148+
for p in patterns:
149+
pattern = DeltaTitlePattern.objects.create(
150+
collection=collection,
151+
match_pattern=p["pattern"],
152+
match_pattern_type=2, # Multi-URL Pattern
153+
title_pattern=p["title"],
154+
)
155+
pattern.apply()
156+
title_patterns.append(pattern)
157+
158+
# Initial promotion
159+
collection.promote_to_curated()
160+
161+
# Verify our complex setup
162+
for pattern in title_patterns:
163+
matching_urls = pattern.curated_urls.all()
164+
print(f"\nPattern '{pattern.match_pattern}' matches {matching_urls.count()} URLs:")
165+
for url in matching_urls:
166+
print(f"- {url.url}")
167+
168+
# Now create deletion DeltaUrls but with overlapping pattern matches
169+
urls_to_delete = ["https://example.com/docs/guide1", "https://example.com/api/v1/doc1"]
170+
for url in urls_to_delete:
171+
DeltaUrl.objects.create(collection=collection, url=url, to_delete=True)
172+
173+
# Try the promotion - this should trigger similar conditions to production
174+
collection.promote_to_curated()
175+
176+
# Print final state for debugging
177+
print("\nFinal state:")
178+
for pattern in title_patterns:
179+
print(f"\nPattern '{pattern.match_pattern}':")
180+
for url in pattern.curated_urls.all():
181+
print(f"- {url.url}")
182+
183+
184+
@pytest.mark.django_db
185+
def test_promotion_with_title_change():
186+
"""Test updating a CuratedUrl that has active title pattern relationships."""
187+
collection = CollectionFactory()
188+
189+
# Create initial DeltaUrl and promote it
190+
url = "https://example.com/doc1"
191+
DeltaUrl.objects.create(collection=collection, url=url, scraped_title="Original Title")
192+
193+
# Create and apply a title pattern
194+
pattern = DeltaTitlePattern.objects.create(
195+
collection=collection, match_pattern=".*doc1", match_pattern_type=2, title_pattern="Pattern: {title}"
196+
)
197+
pattern.apply()
198+
199+
# Initial promotion
200+
collection.promote_to_curated()
201+
202+
# Verify pattern relationship exists
203+
curated = CuratedUrl.objects.get(url=url)
204+
assert pattern.curated_urls.filter(id=curated.id).exists()
205+
206+
# Now create new DeltaUrl with updated title
207+
DeltaUrl.objects.create(collection=collection, url=url, scraped_title="New Title") # Changed title
208+
209+
# This should trigger the same error we're seeing in production
210+
collection.promote_to_curated()

0 commit comments

Comments
 (0)