|
1 | 1 | # docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_promote_collection.py
|
2 |
| - |
3 | 2 | import pytest
|
4 | 3 |
|
5 | 4 | from sde_collections.models.delta_patterns import (
|
6 | 5 | DeltaExcludePattern,
|
7 | 6 | DeltaIncludePattern,
|
| 7 | + DeltaTitlePattern, |
8 | 8 | )
|
9 | 9 | from sde_collections.models.delta_url import CuratedUrl, DeltaUrl
|
10 | 10 | from sde_collections.tests.factories import CollectionFactory
|
@@ -116,3 +116,95 @@ def test_patterns_reapplied_after_promotion(collection):
|
116 | 116 |
|
117 | 117 | # Verify exclusion status
|
118 | 118 | assert curated_urls.filter(url="https://exclude.com", excluded=True).exists()
|
| 119 | + |
| 120 | + |
| 121 | +@pytest.mark.django_db |
| 122 | +def test_promotion_with_overlapping_patterns_and_deletion(): |
| 123 | + """Test complex scenario with multiple overlapping patterns and URL deletion.""" |
| 124 | + collection = CollectionFactory() |
| 125 | + |
| 126 | + # Create a more complex set of URLs that might trigger overlapping patterns |
| 127 | + urls = [ |
| 128 | + "https://example.com/docs/guide1", |
| 129 | + "https://example.com/docs/guide2", |
| 130 | + "https://example.com/api/v1/doc1", |
| 131 | + "https://example.com/api/v1/doc2", |
| 132 | + ] |
| 133 | + |
| 134 | + # Create initial DeltaUrls |
| 135 | + for url in urls: |
| 136 | + DeltaUrl.objects.create(collection=collection, url=url, scraped_title=f"Title for {url}") |
| 137 | + |
| 138 | + # Create overlapping patterns that will affect the same URLs |
| 139 | + patterns = [ |
| 140 | + {"pattern": ".*docs.*", "title": "Documentation: {title}"}, |
| 141 | + {"pattern": ".*guide.*", "title": "Guide: {title}"}, |
| 142 | + {"pattern": ".*api.*", "title": "API: {title}"}, |
| 143 | + {"pattern": ".*doc[0-9]", "title": "Doc Number: {title}"}, |
| 144 | + ] |
| 145 | + |
| 146 | + # Create and apply multiple patterns |
| 147 | + title_patterns = [] |
| 148 | + for p in patterns: |
| 149 | + pattern = DeltaTitlePattern.objects.create( |
| 150 | + collection=collection, |
| 151 | + match_pattern=p["pattern"], |
| 152 | + match_pattern_type=2, # Multi-URL Pattern |
| 153 | + title_pattern=p["title"], |
| 154 | + ) |
| 155 | + pattern.apply() |
| 156 | + title_patterns.append(pattern) |
| 157 | + |
| 158 | + # Initial promotion |
| 159 | + collection.promote_to_curated() |
| 160 | + |
| 161 | + # Verify our complex setup |
| 162 | + for pattern in title_patterns: |
| 163 | + matching_urls = pattern.curated_urls.all() |
| 164 | + print(f"\nPattern '{pattern.match_pattern}' matches {matching_urls.count()} URLs:") |
| 165 | + for url in matching_urls: |
| 166 | + print(f"- {url.url}") |
| 167 | + |
| 168 | + # Now create deletion DeltaUrls but with overlapping pattern matches |
| 169 | + urls_to_delete = ["https://example.com/docs/guide1", "https://example.com/api/v1/doc1"] |
| 170 | + for url in urls_to_delete: |
| 171 | + DeltaUrl.objects.create(collection=collection, url=url, to_delete=True) |
| 172 | + |
| 173 | + # Try the promotion - this should trigger similar conditions to production |
| 174 | + collection.promote_to_curated() |
| 175 | + |
| 176 | + # Print final state for debugging |
| 177 | + print("\nFinal state:") |
| 178 | + for pattern in title_patterns: |
| 179 | + print(f"\nPattern '{pattern.match_pattern}':") |
| 180 | + for url in pattern.curated_urls.all(): |
| 181 | + print(f"- {url.url}") |
| 182 | + |
| 183 | + |
| 184 | +@pytest.mark.django_db |
| 185 | +def test_promotion_with_title_change(): |
| 186 | + """Test updating a CuratedUrl that has active title pattern relationships.""" |
| 187 | + collection = CollectionFactory() |
| 188 | + |
| 189 | + # Create initial DeltaUrl and promote it |
| 190 | + url = "https://example.com/doc1" |
| 191 | + DeltaUrl.objects.create(collection=collection, url=url, scraped_title="Original Title") |
| 192 | + |
| 193 | + # Create and apply a title pattern |
| 194 | + pattern = DeltaTitlePattern.objects.create( |
| 195 | + collection=collection, match_pattern=".*doc1", match_pattern_type=2, title_pattern="Pattern: {title}" |
| 196 | + ) |
| 197 | + pattern.apply() |
| 198 | + |
| 199 | + # Initial promotion |
| 200 | + collection.promote_to_curated() |
| 201 | + |
| 202 | + # Verify pattern relationship exists |
| 203 | + curated = CuratedUrl.objects.get(url=url) |
| 204 | + assert pattern.curated_urls.filter(id=curated.id).exists() |
| 205 | + |
| 206 | + # Now create new DeltaUrl with updated title |
| 207 | + DeltaUrl.objects.create(collection=collection, url=url, scraped_title="New Title") # Changed title |
| 208 | + |
| 209 | + # This should trigger the same error we're seeing in production |
| 210 | + collection.promote_to_curated() |
0 commit comments