3
3
4
4
import pytest
5
5
6
+ from sde_collections .models .delta_patterns import (
7
+ DeltaExcludePattern ,
8
+ DeltaIncludePattern ,
9
+ )
6
10
from sde_collections .models .delta_url import DeltaUrl , DumpUrl
7
11
from sde_collections .tests .factories import (
8
12
CollectionFactory ,
@@ -60,7 +64,7 @@ def test_new_url_in_dump_only(self):
60
64
def test_url_in_both_with_different_field (self ):
61
65
collection = CollectionFactory ()
62
66
dump_url = DumpUrlFactory (collection = collection , scraped_title = "New Title" )
63
- curated_url = CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Old Title" ) # noqa
67
+ CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Old Title" )
64
68
collection .migrate_dump_to_delta ()
65
69
delta = DeltaUrl .objects .get (url = dump_url .url )
66
70
assert delta .to_delete is False
@@ -77,7 +81,7 @@ def test_url_in_curated_only(self):
77
81
def test_identical_url_in_both (self ):
78
82
collection = CollectionFactory ()
79
83
dump_url = DumpUrlFactory (collection = collection , scraped_title = "Same Title" )
80
- curated_url = CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Same Title" ) # noqa
84
+ CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Same Title" )
81
85
collection .migrate_dump_to_delta ()
82
86
assert not DeltaUrl .objects .filter (url = dump_url .url ).exists ()
83
87
@@ -200,7 +204,7 @@ def test_full_migration_deleted_url(self):
200
204
def test_empty_delta_comparison_fields ():
201
205
collection = CollectionFactory ()
202
206
dump_url = DumpUrlFactory (collection = collection , scraped_title = "Same Title" )
203
- curated_url = CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Same Title" ) # noqa
207
+ CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "Same Title" ) # noqa
204
208
205
209
global DELTA_COMPARISON_FIELDS
206
210
original_fields = DELTA_COMPARISON_FIELDS
@@ -218,11 +222,88 @@ def test_empty_delta_comparison_fields():
218
222
def test_partial_data_in_curated_urls ():
219
223
collection = CollectionFactory ()
220
224
dump_url = DumpUrlFactory (collection = collection , scraped_title = "Title Exists" )
221
- curated_url = CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "" ) # noqa
225
+ CuratedUrlFactory (collection = collection , url = dump_url .url , scraped_title = "" ) # noqa
222
226
223
227
collection .migrate_dump_to_delta ()
224
228
225
229
# Since `scraped_title` differs (None vs "Title Exists"), it should create a DeltaUrl
226
230
delta = DeltaUrl .objects .get (url = dump_url .url )
227
231
assert delta .scraped_title == "Title Exists"
228
232
assert delta .to_delete is False
233
+
234
+
235
+ @pytest .mark .django_db
236
+ def test_patterns_applied_after_migration ():
237
+ collection = CollectionFactory ()
238
+
239
+ # Add DumpUrls to migrate
240
+ DumpUrlFactory (collection = collection , url = "https://exclude.com" )
241
+ DumpUrlFactory (collection = collection , url = "https://include.com" )
242
+ DumpUrlFactory (collection = collection , url = "https://neutral.com" )
243
+
244
+ # Create exclude and include patterns
245
+ exclude_pattern = DeltaExcludePattern .objects .create (
246
+ collection = collection , match_pattern_type = 2 , match_pattern = "exclude.*"
247
+ )
248
+ include_pattern = DeltaIncludePattern .objects .create (
249
+ collection = collection , match_pattern_type = 2 , match_pattern = "include.*"
250
+ )
251
+
252
+ # Perform the migration
253
+ collection .migrate_dump_to_delta ()
254
+
255
+ # Check that the patterns were applied
256
+ exclude_pattern .refresh_from_db ()
257
+ include_pattern .refresh_from_db ()
258
+
259
+ # Verify exclude pattern relationship
260
+ assert exclude_pattern .delta_urls .filter (
261
+ url = "https://exclude.com"
262
+ ).exists (), "Exclude pattern not applied to DeltaUrls."
263
+
264
+ # Verify include pattern relationship
265
+ assert include_pattern .delta_urls .filter (
266
+ url = "https://include.com"
267
+ ).exists (), "Include pattern not applied to DeltaUrls."
268
+
269
+ # Ensure neutral URL is unaffected
270
+ assert not exclude_pattern .delta_urls .filter (
271
+ url = "https://neutral.com"
272
+ ).exists (), "Exclude pattern incorrectly applied."
273
+ assert not include_pattern .delta_urls .filter (
274
+ url = "https://neutral.com"
275
+ ).exists (), "Include pattern incorrectly applied."
276
+
277
+
278
+ @pytest .mark .django_db
279
+ def test_full_migration_with_patterns ():
280
+ collection = CollectionFactory ()
281
+
282
+ # Set up DumpUrls and CuratedUrls
283
+ DumpUrlFactory (collection = collection , url = "https://new.com" )
284
+ DumpUrlFactory (collection = collection , url = "https://update.com" , scraped_title = "Updated Title" )
285
+ CuratedUrlFactory (collection = collection , url = "https://update.com" , scraped_title = "Old Title" )
286
+ CuratedUrlFactory (collection = collection , url = "https://delete.com" )
287
+
288
+ # Create patterns
289
+ exclude_pattern = DeltaExcludePattern .objects .create (
290
+ collection = collection , match_pattern_type = 2 , match_pattern = "delete.*"
291
+ )
292
+ include_pattern = DeltaIncludePattern .objects .create (
293
+ collection = collection , match_pattern_type = 2 , match_pattern = "update.*"
294
+ )
295
+
296
+ # Perform migration
297
+ collection .migrate_dump_to_delta ()
298
+
299
+ # Check DeltaUrls
300
+ assert DeltaUrl .objects .filter (url = "https://new.com" , to_delete = False ).exists ()
301
+ assert DeltaUrl .objects .filter (url = "https://update.com" , to_delete = False , scraped_title = "Updated Title" ).exists ()
302
+ assert DeltaUrl .objects .filter (url = "https://delete.com" , to_delete = True ).exists ()
303
+
304
+ # Check patterns
305
+ exclude_pattern .refresh_from_db ()
306
+ include_pattern .refresh_from_db ()
307
+
308
+ assert exclude_pattern .delta_urls .filter (url = "https://delete.com" ).exists (), "Exclude pattern not applied."
309
+ assert include_pattern .delta_urls .filter (url = "https://update.com" ).exists (), "Include pattern not applied."
0 commit comments