Skip to content

Commit 430a1c1

Browse files
committed
add additional promotion tests
1 parent 0b83d17 commit 430a1c1

File tree

1 file changed

+183
-1
lines changed

1 file changed

+183
-1
lines changed

sde_collections/tests/test_promote_collection.py

Lines changed: 183 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_promote_collection.py
22
import pytest
33

4+
from sde_collections.models.collection_choice_fields import Divisions, DocumentTypes
45
from sde_collections.models.delta_patterns import (
6+
DeltaDivisionPattern,
7+
DeltaDocumentTypePattern,
58
DeltaExcludePattern,
69
DeltaIncludePattern,
710
DeltaTitlePattern,
811
)
9-
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl
12+
from sde_collections.models.delta_url import CuratedUrl, DeltaUrl, DumpUrl
1013
from sde_collections.tests.factories import CollectionFactory
1114

1215

@@ -208,3 +211,182 @@ def test_promotion_with_title_change():
208211

209212
# This should trigger the same error we're seeing in production
210213
collection.promote_to_curated()
214+
215+
216+
@pytest.mark.django_db
217+
def test_promotion_maintains_pattern_relationships_through_updates(collection):
218+
"""Test that pattern relationships survive multiple promotions with updates"""
219+
# Initial setup
220+
DeltaUrl.objects.create(collection=collection, url="https://example.com", scraped_title="Title")
221+
pattern = DeltaTitlePattern.objects.create(
222+
collection=collection, match_pattern="example.com", match_pattern_type=1, title_pattern="Pattern: {title}"
223+
)
224+
225+
collection.promote_to_curated()
226+
227+
# Record initial state
228+
curated = CuratedUrl.objects.get(url="https://example.com")
229+
initial_id = curated.id
230+
initial_pattern_relations = list(pattern.curated_urls.all())
231+
232+
# Create new delta with changes
233+
DeltaUrl.objects.create(collection=collection, url="https://example.com", scraped_title="New Title")
234+
collection.promote_to_curated()
235+
236+
# Verify relationships maintained
237+
curated.refresh_from_db()
238+
assert curated.id == initial_id # ID should not change
239+
assert list(pattern.curated_urls.all()) == initial_pattern_relations
240+
241+
242+
@pytest.mark.django_db
243+
def test_sequential_promotions_with_multiple_patterns(collection):
244+
"""Test complex scenario with multiple promotions and pattern changes"""
245+
# Initial setup with two URLs
246+
urls = ["https://example.com/doc", "https://example.com/guide"]
247+
for url in urls:
248+
DeltaUrl.objects.create(collection=collection, url=url, scraped_title=f"Title for {url}")
249+
250+
# First pattern and promotion
251+
pattern1 = DeltaTitlePattern.objects.create(
252+
collection=collection, match_pattern="doc", match_pattern_type=2, title_pattern="Doc: {title}"
253+
)
254+
255+
collection.promote_to_curated()
256+
257+
# Record state after first promotion
258+
initial_ids = {url: CuratedUrl.objects.get(url=url).id for url in urls}
259+
260+
DeltaUrl.objects.create(collection=collection, url="https://example.com/guide", scraped_title="Updated guide")
261+
collection.promote_to_curated()
262+
263+
pattern2 = DeltaTitlePattern.objects.create(
264+
collection=collection, match_pattern="guide", match_pattern_type=2, title_pattern="Guide: {title}"
265+
)
266+
assert not pattern2.curated_urls.filter(url__contains="guide").exists()
267+
268+
# Verify state
269+
for url in urls:
270+
curated = CuratedUrl.objects.get(url=url)
271+
assert curated.id == initial_ids[url] # IDs should be preserved
272+
273+
collection.promote_to_curated()
274+
275+
# Verify pattern relationships
276+
assert pattern1.curated_urls.filter(url__contains="doc").exists()
277+
assert pattern2.curated_urls.filter(url__contains="guide").exists()
278+
279+
280+
@pytest.mark.django_db
281+
def test_promotion_with_division_changes(collection):
282+
"""Test that division patterns are correctly promoted and applied"""
283+
# Initial setup
284+
DeltaUrl.objects.create(collection=collection, url="https://example.com/astrophysics", division=Divisions.GENERAL)
285+
DeltaUrl.objects.create(collection=collection, url="https://example.com/helio", division=Divisions.GENERAL)
286+
287+
# Create and apply division patterns
288+
DeltaDivisionPattern.objects.create(
289+
collection=collection,
290+
match_pattern="astrophysics",
291+
match_pattern_type=2, # Multi-URL pattern
292+
division=Divisions.ASTROPHYSICS,
293+
)
294+
295+
DeltaDivisionPattern.objects.create(
296+
collection=collection, match_pattern="helio", match_pattern_type=2, division=Divisions.HELIOPHYSICS
297+
)
298+
299+
# Promote and verify divisions were set
300+
collection.promote_to_curated()
301+
302+
assert CuratedUrl.objects.get(url__contains="astrophysics").division == Divisions.ASTROPHYSICS
303+
assert CuratedUrl.objects.get(url__contains="helio").division == Divisions.HELIOPHYSICS
304+
305+
306+
@pytest.mark.django_db
307+
def test_promotion_with_document_type_changes(collection):
308+
"""Test document type patterns through promotion"""
309+
# Create URLs with default doc type
310+
DeltaUrl.objects.create(
311+
collection=collection, url="https://example.com/data/set1", document_type=DocumentTypes.DOCUMENTATION
312+
)
313+
DeltaUrl.objects.create(
314+
collection=collection, url="https://example.com/tools/tool1", document_type=DocumentTypes.DOCUMENTATION
315+
)
316+
317+
# Set up patterns for different doc types
318+
DeltaDocumentTypePattern.objects.create(
319+
collection=collection, match_pattern="data/*", match_pattern_type=2, document_type=DocumentTypes.DATA
320+
).apply()
321+
322+
DeltaDocumentTypePattern.objects.create(
323+
collection=collection, match_pattern="tools/*", match_pattern_type=2, document_type=DocumentTypes.SOFTWARETOOLS
324+
).apply()
325+
326+
collection.promote_to_curated()
327+
328+
# Verify document types were correctly set
329+
assert CuratedUrl.objects.get(url__contains="/data/").document_type == DocumentTypes.DATA
330+
assert CuratedUrl.objects.get(url__contains="/tools/").document_type == DocumentTypes.SOFTWARETOOLS
331+
332+
333+
@pytest.mark.django_db
334+
def test_promotion_with_multiple_metadata_changes_dump(collection):
335+
"""Test complex scenario with multiple metadata changes through multiple promotions"""
336+
# Initial URL we'll be working with
337+
url = "https://example.com/helio/data"
338+
339+
# Create initial DumpUrl
340+
DumpUrl.objects.create(
341+
collection=collection,
342+
url=url,
343+
division=Divisions.GENERAL,
344+
document_type=DocumentTypes.DOCUMENTATION,
345+
scraped_title="Raw Data Title",
346+
)
347+
348+
# Migrate DumpUrls to DeltaUrls
349+
collection.migrate_dump_to_delta()
350+
351+
# Create patterns that will affect this URL
352+
DeltaDivisionPattern.objects.create(
353+
collection=collection, match_pattern="*helio*", match_pattern_type=2, division=Divisions.HELIOPHYSICS
354+
)
355+
356+
DeltaDocumentTypePattern.objects.create(
357+
collection=collection, match_pattern="*data*", match_pattern_type=2, document_type=DocumentTypes.DATA
358+
)
359+
360+
DeltaTitlePattern.objects.create(
361+
collection=collection, match_pattern="*data*", match_pattern_type=2, title_pattern="Heliophysics Data: {title}"
362+
)
363+
364+
# First promotion
365+
collection.promote_to_curated()
366+
367+
# Verify initial promotion worked correctly
368+
curated = CuratedUrl.objects.get(url=url)
369+
assert curated.division == Divisions.HELIOPHYSICS
370+
assert curated.document_type == DocumentTypes.DATA
371+
assert curated.generated_title == "Heliophysics Data: Raw Data Title"
372+
373+
# Create new DumpUrl with updated data to simulate a new crawl
374+
DumpUrl.objects.create(
375+
collection=collection,
376+
url=url,
377+
division=Divisions.GENERAL, # These will be overridden by patterns
378+
document_type=DocumentTypes.DOCUMENTATION, # These will be overridden by patterns
379+
scraped_title="Updated Data Title",
380+
)
381+
382+
# Migrate new dump to delta
383+
collection.migrate_dump_to_delta()
384+
385+
# Second promotion - should maintain pattern-applied metadata while updating the title
386+
collection.promote_to_curated()
387+
388+
# Verify final state
389+
curated = CuratedUrl.objects.get(url=url)
390+
assert curated.division == Divisions.HELIOPHYSICS # Should still be preserved from pattern
391+
assert curated.document_type == DocumentTypes.DATA # Should still be preserved from pattern
392+
assert curated.generated_title == "Heliophysics Data: Updated Data Title" # Should reflect new title

0 commit comments

Comments
 (0)