|
18 | 18 | CollectionFactory,
|
19 | 19 | CuratedUrlFactory,
|
20 | 20 | DeltaUrlFactory,
|
| 21 | + DumpUrlFactory, |
21 | 22 | )
|
22 | 23 | from sde_collections.utils.title_resolver import resolve_title
|
23 | 24 |
|
@@ -84,6 +85,72 @@ def test_generate_delta_url_creation_and_update(self):
|
84 | 85 | delta_url.refresh_from_db()
|
85 | 86 | assert delta_url.scraped_title == original_delta_title
|
86 | 87 |
|
| 88 | + def test_apply_creates_delta_url_if_curated_url_does_not_exist(self): |
| 89 | + """ |
| 90 | + Ensures that the `apply` logic creates a new `DeltaUrl` if a matching `CuratedUrl` does not exist. |
| 91 | + """ |
| 92 | + collection = CollectionFactory() |
| 93 | + delta_url = DeltaUrlFactory( |
| 94 | + collection=collection, url="https://example.com/page", scraped_title="Original Title" |
| 95 | + ) |
| 96 | + |
| 97 | + # Create a pattern matching the URL |
| 98 | + pattern = DeltaIncludePattern.objects.create( |
| 99 | + collection=collection, match_pattern="https://example.com/*", match_pattern_type=2 |
| 100 | + ) |
| 101 | + |
| 102 | + # Apply the pattern |
| 103 | + pattern.apply() |
| 104 | + |
| 105 | + # Verify that a DeltaUrl is created |
| 106 | + assert DeltaUrl.objects.filter(url=delta_url.url).exists() |
| 107 | + |
| 108 | + def test_apply_skips_delta_url_creation_if_curated_url_exists(self): |
| 109 | + """ |
| 110 | + Ensures that the `apply` logic does not create a new `DeltaUrl` if a matching `CuratedUrl` already exists. |
| 111 | + """ |
| 112 | + collection = CollectionFactory() |
| 113 | + delta_url = DeltaUrlFactory( |
| 114 | + collection=collection, url="https://example.com/page", scraped_title="Original Title" |
| 115 | + ) |
| 116 | + |
| 117 | + # Create a pattern matching the URL |
| 118 | + pattern = DeltaIncludePattern.objects.create( |
| 119 | + collection=collection, match_pattern="https://example.com/*", match_pattern_type=2 |
| 120 | + ) |
| 121 | + |
| 122 | + # Promote the DeltaUrl to a CuratedUrl |
| 123 | + collection.promote_to_curated() |
| 124 | + curated_url = CuratedUrl.objects.get(url=delta_url.url) |
| 125 | + |
| 126 | + # ReApply the pattern |
| 127 | + pattern.apply() |
| 128 | + |
| 129 | + # Verify that no DeltaUrl is created after the CuratedUrl exists |
| 130 | + assert not DeltaUrl.objects.filter(url=curated_url.url).exists() |
| 131 | + |
| 132 | + def test_apply_creates_delta_url_if_no_curated_url_exists(self): |
| 133 | + """ |
| 134 | + Ensures that if no `CuratedUrl` exists for a given pattern, a new `DeltaUrl` is created. |
| 135 | + """ |
| 136 | + collection = CollectionFactory() |
| 137 | + dump_url = DumpUrlFactory(collection=collection, url="https://example.com/page", scraped_title="New Title") |
| 138 | + |
| 139 | + # Migrate DumpUrl to DeltaUrl |
| 140 | + collection.migrate_dump_to_delta() |
| 141 | + |
| 142 | + # Create a pattern matching the URL |
| 143 | + pattern = DeltaIncludePattern.objects.create( |
| 144 | + collection=collection, match_pattern="https://example.com/*", match_pattern_type=2 |
| 145 | + ) |
| 146 | + |
| 147 | + # Apply the pattern |
| 148 | + pattern.apply() |
| 149 | + |
| 150 | + # A `DeltaUrl` should now exist |
| 151 | + delta_url = DeltaUrl.objects.get(url=dump_url.url) |
| 152 | + assert delta_url.scraped_title == dump_url.scraped_title |
| 153 | + |
87 | 154 | def test_apply_and_unapply_pattern(self):
|
88 | 155 | # if we make a new exclude pattern and it affects an old url
|
89 | 156 | # that wasn't previously affected, what should happen?
|
@@ -258,6 +325,30 @@ def test_unapply_removes_pattern_relationships(self):
|
258 | 325 | assert not pattern.delta_urls.filter(pk=delta_url.pk).exists()
|
259 | 326 | assert not pattern.curated_urls.filter(pk=curated_url.pk).exists()
|
260 | 327 |
|
| 328 | + # TODO: work on this test logic |
| 329 | + # def test_pattern_reapplication_does_not_duplicate_delta_urls(self): |
| 330 | + # """ |
| 331 | + # Ensures that reapplying a pattern does not create duplicate `DeltaUrls` or affect existing `CuratedUrls`. |
| 332 | + # """ |
| 333 | + # collection = CollectionFactory() |
| 334 | + # delta_url = DeltaUrlFactory(collection=collection, |
| 335 | + # url="https://example.com/page", |
| 336 | + # scraped_title="Title Before") |
| 337 | + |
| 338 | + # # Promote to CuratedUrl |
| 339 | + # collection.promote_to_curated() |
| 340 | + # curated_url = CuratedUrl.objects.get(url=delta_url.url) |
| 341 | + |
| 342 | + # # Apply a pattern |
| 343 | + # pattern = DeltaTitlePattern.objects.create( |
| 344 | + # collection=collection, match_pattern="https://example.com/*", match_pattern_type=2, title_patte...... |
| 345 | + # ) |
| 346 | + # pattern.apply() |
| 347 | + |
| 348 | + # # Ensure no new `DeltaUrl` is created after reapplying the pattern |
| 349 | + # pattern.apply() |
| 350 | + # assert DeltaUrl.objects.filter(url=curated_url.url).count() == 0 |
| 351 | + |
261 | 352 |
|
262 | 353 | @pytest.mark.django_db
|
263 | 354 | class TestDeltaDocumentTypePattern:
|
|
0 commit comments