Skip to content

Commit a88f3f6

Browse files
committed
updated patterns to refresh url lists
1 parent 3c7fab9 commit a88f3f6

File tree

4 files changed

+162
-11
lines changed

4 files changed

+162
-11
lines changed

sde_collections/models/collection.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import requests
55
from django.contrib.auth import get_user_model
6+
from django.contrib.contenttypes.models import ContentType
67
from django.db import models
78
from django.db.models.signals import post_save
89
from django.dispatch import receiver
@@ -93,6 +94,28 @@ def clear_dump_urls(self):
9394
"""Clears all DumpUrls for this collection."""
9495
DumpUrl.objects.filter(collection=self).delete()
9596

97+
def refresh_url_lists_for_all_patterns(self):
98+
"""
99+
Updates pattern relations for all patterns associated with this collection.
100+
"""
101+
# List of pattern models to update
102+
pattern_models = [
103+
"DeltaExcludePattern",
104+
"DeltaIncludePattern",
105+
"DeltaTitlePattern",
106+
"DeltaDocumentTypePattern",
107+
"DeltaDivisionPattern",
108+
]
109+
110+
# Loop through each model and update its relations
111+
for model_name in pattern_models:
112+
# Get the model dynamically
113+
model = ContentType.objects.get(app_label="sde_collections", model=model_name.lower()).model_class()
114+
115+
# Filter patterns for the current collection and update relations
116+
for pattern in model.objects.filter(collection=self):
117+
pattern.refresh_url_lists()
118+
96119
def migrate_dump_to_delta(self):
97120
"""Main function to handle migration from DumpUrls to DeltaUrls with specific rules."""
98121
# Step 1: Clear existing DeltaUrls for this collection
@@ -122,6 +145,9 @@ def migrate_dump_to_delta(self):
122145
# Step 5: Clear DumpUrls after migration is complete
123146
self.clear_dump_urls()
124147

148+
# Step 6: Reapply patterns to DeltaUrls
149+
self.refresh_url_lists_for_all_patterns()
150+
125151
def create_or_update_delta_url(self, url_instance, to_delete=False):
126152
"""
127153
Creates or updates a DeltaUrl entry based on the given DumpUrl or CuratedUrl object.
@@ -172,7 +198,6 @@ def promote_to_curated(self):
172198
updated_fields[field_name] = delta_value
173199

174200
if updated_fields:
175-
# Use update to modify fields directly in the database
176201
CuratedUrl.objects.filter(pk=curated.pk).update(**updated_fields)
177202
else:
178203
# If no matching CuratedUrl, create a new one using all non-null and non-empty fields
@@ -186,6 +211,9 @@ def promote_to_curated(self):
186211
# Step 3: Clear all DeltaUrls for this collection since they've been promoted
187212
DeltaUrl.objects.filter(collection=self).delete()
188213

214+
# Step 4: Reapply patterns to DeltaUrls
215+
self.refresh_url_lists_for_all_patterns()
216+
189217
def add_to_public_query(self):
190218
"""Add the collection to the public query."""
191219
if self.workflow_status not in [

sde_collections/models/delta_patterns.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class MatchPatternTypeChoices(models.IntegerChoices):
4141

4242
def matched_urls(self):
4343
"""Find all URLs matching the pattern."""
44-
# Dynamically get the DeltaUrl model to avoid circular imports
4544
DeltaUrl = apps.get_model("sde_collections", "DeltaUrl")
4645
CuratedUrl = apps.get_model("sde_collections", "CuratedUrl")
4746

@@ -62,6 +61,12 @@ def matched_urls(self):
6261
"matching_curated_urls": matching_curated_urls,
6362
}
6463

64+
def refresh_url_lists(self):
65+
"""Update the delta_urls and curated_urls ManyToMany relationships."""
66+
matched_urls = self.matched_urls()
67+
self.delta_urls.set(matched_urls["matching_delta_urls"])
68+
self.curated_urls.set(matched_urls["matching_curated_urls"])
69+
6570
def generate_delta_url(self, curated_url, fields_to_copy=None):
6671
"""
6772
Generates or updates a DeltaUrl based on a CuratedUrl.
@@ -93,9 +98,8 @@ def apply(self, fields_to_copy=None, update_fields=None):
9398
if update_fields:
9499
matched_urls["matching_delta_urls"].update(**update_fields)
95100

96-
# Step 3: Populate ManyToMany relationships for DeltaUrls and CuratedUrls
97-
self.delta_urls.add(*matched_urls["matching_delta_urls"])
98-
self.curated_urls.add(*matched_urls["matching_curated_urls"])
101+
# Update ManyToMany relationships
102+
self.refresh_url_lists()
99103

100104
def unapply(self):
101105
"""Default unapply behavior."""
@@ -182,8 +186,7 @@ def apply(self) -> None:
182186
self.create_delta_if_title_differs(curated_url, DeltaResolvedTitle, DeltaResolvedTitleError)
183187

184188
# Step 3: Update ManyToMany relationships for DeltaUrls and CuratedUrls
185-
self.delta_urls.add(*matched_urls["matching_delta_urls"])
186-
self.curated_urls.add(*matched_urls["matching_curated_urls"])
189+
self.refresh_url_lists()
187190

188191
def create_delta_if_title_differs(self, curated_url, DeltaResolvedTitle, DeltaResolvedTitleError):
189192
"""

sde_collections/tests/test_migrate_dump.py

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
import pytest
55

6+
from sde_collections.models.delta_patterns import (
7+
DeltaExcludePattern,
8+
DeltaIncludePattern,
9+
)
610
from sde_collections.models.delta_url import DeltaUrl, DumpUrl
711
from sde_collections.tests.factories import (
812
CollectionFactory,
@@ -60,7 +64,7 @@ def test_new_url_in_dump_only(self):
6064
def test_url_in_both_with_different_field(self):
6165
collection = CollectionFactory()
6266
dump_url = DumpUrlFactory(collection=collection, scraped_title="New Title")
63-
curated_url = CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Old Title") # noqa
67+
CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Old Title")
6468
collection.migrate_dump_to_delta()
6569
delta = DeltaUrl.objects.get(url=dump_url.url)
6670
assert delta.to_delete is False
@@ -77,7 +81,7 @@ def test_url_in_curated_only(self):
7781
def test_identical_url_in_both(self):
7882
collection = CollectionFactory()
7983
dump_url = DumpUrlFactory(collection=collection, scraped_title="Same Title")
80-
curated_url = CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Same Title") # noqa
84+
CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Same Title")
8185
collection.migrate_dump_to_delta()
8286
assert not DeltaUrl.objects.filter(url=dump_url.url).exists()
8387

@@ -200,7 +204,7 @@ def test_full_migration_deleted_url(self):
200204
def test_empty_delta_comparison_fields():
201205
collection = CollectionFactory()
202206
dump_url = DumpUrlFactory(collection=collection, scraped_title="Same Title")
203-
curated_url = CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Same Title") # noqa
207+
CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="Same Title") # noqa
204208

205209
global DELTA_COMPARISON_FIELDS
206210
original_fields = DELTA_COMPARISON_FIELDS
@@ -218,11 +222,88 @@ def test_empty_delta_comparison_fields():
218222
def test_partial_data_in_curated_urls():
219223
collection = CollectionFactory()
220224
dump_url = DumpUrlFactory(collection=collection, scraped_title="Title Exists")
221-
curated_url = CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="") # noqa
225+
CuratedUrlFactory(collection=collection, url=dump_url.url, scraped_title="") # noqa
222226

223227
collection.migrate_dump_to_delta()
224228

225229
# Since `scraped_title` differs (None vs "Title Exists"), it should create a DeltaUrl
226230
delta = DeltaUrl.objects.get(url=dump_url.url)
227231
assert delta.scraped_title == "Title Exists"
228232
assert delta.to_delete is False
233+
234+
235+
@pytest.mark.django_db
236+
def test_patterns_applied_after_migration():
237+
collection = CollectionFactory()
238+
239+
# Add DumpUrls to migrate
240+
DumpUrlFactory(collection=collection, url="https://exclude.com")
241+
DumpUrlFactory(collection=collection, url="https://include.com")
242+
DumpUrlFactory(collection=collection, url="https://neutral.com")
243+
244+
# Create exclude and include patterns
245+
exclude_pattern = DeltaExcludePattern.objects.create(
246+
collection=collection, match_pattern_type=2, match_pattern="exclude.*"
247+
)
248+
include_pattern = DeltaIncludePattern.objects.create(
249+
collection=collection, match_pattern_type=2, match_pattern="include.*"
250+
)
251+
252+
# Perform the migration
253+
collection.migrate_dump_to_delta()
254+
255+
# Check that the patterns were applied
256+
exclude_pattern.refresh_from_db()
257+
include_pattern.refresh_from_db()
258+
259+
# Verify exclude pattern relationship
260+
assert exclude_pattern.delta_urls.filter(
261+
url="https://exclude.com"
262+
).exists(), "Exclude pattern not applied to DeltaUrls."
263+
264+
# Verify include pattern relationship
265+
assert include_pattern.delta_urls.filter(
266+
url="https://include.com"
267+
).exists(), "Include pattern not applied to DeltaUrls."
268+
269+
# Ensure neutral URL is unaffected
270+
assert not exclude_pattern.delta_urls.filter(
271+
url="https://neutral.com"
272+
).exists(), "Exclude pattern incorrectly applied."
273+
assert not include_pattern.delta_urls.filter(
274+
url="https://neutral.com"
275+
).exists(), "Include pattern incorrectly applied."
276+
277+
278+
@pytest.mark.django_db
279+
def test_full_migration_with_patterns():
280+
collection = CollectionFactory()
281+
282+
# Set up DumpUrls and CuratedUrls
283+
DumpUrlFactory(collection=collection, url="https://new.com")
284+
DumpUrlFactory(collection=collection, url="https://update.com", scraped_title="Updated Title")
285+
CuratedUrlFactory(collection=collection, url="https://update.com", scraped_title="Old Title")
286+
CuratedUrlFactory(collection=collection, url="https://delete.com")
287+
288+
# Create patterns
289+
exclude_pattern = DeltaExcludePattern.objects.create(
290+
collection=collection, match_pattern_type=2, match_pattern="delete.*"
291+
)
292+
include_pattern = DeltaIncludePattern.objects.create(
293+
collection=collection, match_pattern_type=2, match_pattern="update.*"
294+
)
295+
296+
# Perform migration
297+
collection.migrate_dump_to_delta()
298+
299+
# Check DeltaUrls
300+
assert DeltaUrl.objects.filter(url="https://new.com", to_delete=False).exists()
301+
assert DeltaUrl.objects.filter(url="https://update.com", to_delete=False, scraped_title="Updated Title").exists()
302+
assert DeltaUrl.objects.filter(url="https://delete.com", to_delete=True).exists()
303+
304+
# Check patterns
305+
exclude_pattern.refresh_from_db()
306+
include_pattern.refresh_from_db()
307+
308+
assert exclude_pattern.delta_urls.filter(url="https://delete.com").exists(), "Exclude pattern not applied."
309+
assert include_pattern.delta_urls.filter(url="https://update.com").exists(), "Include pattern not applied."

sde_collections/tests/test_promote_collection.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import pytest
44

5+
from sde_collections.models.delta_patterns import (
6+
DeltaExcludePattern,
7+
DeltaIncludePattern,
8+
)
59
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl
610
from sde_collections.tests.factories import CollectionFactory
711

@@ -77,3 +81,38 @@ def test_promotion_deletes_curated_urls(collection):
7781
assert not CuratedUrl.objects.filter(url="https://example1.com").exists()
7882
# Ensure the other CuratedUrl is still present
7983
assert CuratedUrl.objects.filter(url="https://example2.com").exists()
84+
85+
86+
@pytest.mark.django_db
87+
def test_patterns_reapplied_after_promotion(collection):
88+
# Add DeltaUrls matching the patterns
89+
DeltaUrl.objects.create(collection=collection, url="https://exclude.com", scraped_title="Exclude This")
90+
DeltaUrl.objects.create(collection=collection, url="https://include.com", scraped_title="Include This")
91+
92+
# Create exclude and include patterns
93+
exclude_pattern = DeltaExcludePattern.objects.create(
94+
collection=collection, match_pattern_type=2, match_pattern="exclude.*"
95+
)
96+
include_pattern = DeltaIncludePattern.objects.create(
97+
collection=collection, match_pattern_type=2, match_pattern="include.*"
98+
)
99+
100+
# Promote DeltaUrls to CuratedUrls
101+
collection.promote_to_curated()
102+
103+
# Refresh the patterns and check relationships
104+
exclude_pattern.refresh_from_db()
105+
include_pattern.refresh_from_db()
106+
107+
# Verify that patterns are reapplied
108+
curated_urls = CuratedUrl.objects.filter(collection=collection)
109+
110+
assert curated_urls.filter(url="https://exclude.com").exists()
111+
assert curated_urls.filter(url="https://include.com").exists()
112+
113+
# Ensure exclude_pattern and include_pattern relationships are populated
114+
assert exclude_pattern.curated_urls.filter(url="https://exclude.com").exists()
115+
assert include_pattern.curated_urls.filter(url="https://include.com").exists()
116+
117+
# Verify exclusion status
118+
assert curated_urls.filter(url="https://exclude.com", excluded=True).exists()

0 commit comments

Comments
 (0)