|
| 1 | +# docker-compose -f local.yml run --rm django pytest sde_collections/tests/test_title_patterns.py |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from sde_collections.models.delta_patterns import DeltaResolvedTitle, DeltaTitlePattern |
| 6 | +from sde_collections.models.delta_url import DeltaUrl |
| 7 | +from sde_collections.tests.factories import CollectionFactory, DeltaUrlFactory |
| 8 | + |
| 9 | + |
| 10 | +@pytest.mark.django_db |
| 11 | +def test_title_pattern_multiple_resolved_titles_extended(): |
| 12 | + """Test that patterns properly handle title resolution based on URL set size.""" |
| 13 | + collection = CollectionFactory() |
| 14 | + |
| 15 | + # Create URLs with different levels of specificity |
| 16 | + url1 = DeltaUrlFactory( |
| 17 | + collection=collection, url="https://example.com/docs/item.html", scraped_title="Original Title" |
| 18 | + ) |
| 19 | + url2 = DeltaUrlFactory( |
| 20 | + collection=collection, url="https://example.com/docs/item2.html", scraped_title="Original Title" |
| 21 | + ) |
| 22 | + url3 = DeltaUrlFactory( |
| 23 | + collection=collection, url="https://example.com/docs/pdfs/item1.html", scraped_title="Original Title" |
| 24 | + ) |
| 25 | + |
| 26 | + # Create general pattern (matches all URLs) |
| 27 | + general_pattern = DeltaTitlePattern.objects.create( |
| 28 | + collection=collection, |
| 29 | + match_pattern="*docs*", |
| 30 | + title_pattern="{title} - Docs", |
| 31 | + match_pattern_type=2, |
| 32 | + ) |
| 33 | + |
| 34 | + # Verify initial pattern application |
| 35 | + assert general_pattern.get_url_match_count() == 3 |
| 36 | + assert DeltaUrl.objects.get(pk=url1.pk).generated_title == "Original Title - Docs" |
| 37 | + assert DeltaUrl.objects.get(pk=url2.pk).generated_title == "Original Title - Docs" |
| 38 | + assert DeltaUrl.objects.get(pk=url3.pk).generated_title == "Original Title - Docs" |
| 39 | + |
| 40 | + # Verify DeltaResolvedTitle entries |
| 41 | + assert DeltaResolvedTitle.objects.count() == 3 |
| 42 | + for url in [url1, url2, url3]: |
| 43 | + resolved = DeltaResolvedTitle.objects.get(delta_url=url) |
| 44 | + assert resolved.title_pattern == general_pattern |
| 45 | + assert resolved.resolved_title == "Original Title - Docs" |
| 46 | + |
| 47 | + # Create more specific pattern |
| 48 | + specific_pattern = DeltaTitlePattern.objects.create( |
| 49 | + collection=collection, match_pattern="*docs/pdfs*", title_pattern="{title} - HTML", match_pattern_type=2 |
| 50 | + ) |
| 51 | + |
| 52 | + # Verify pattern match counts |
| 53 | + assert specific_pattern.get_url_match_count() == 1 # Only matches pdfs URL |
| 54 | + assert general_pattern.get_url_match_count() == 3 # Matches all URLs |
| 55 | + |
| 56 | + # Verify titles were updated appropriately |
| 57 | + assert DeltaUrl.objects.get(pk=url1.pk).generated_title == "Original Title - Docs" # Unchanged |
| 58 | + assert DeltaUrl.objects.get(pk=url2.pk).generated_title == "Original Title - Docs" # Unchanged |
| 59 | + assert DeltaUrl.objects.get(pk=url3.pk).generated_title == "Original Title - HTML" # Updated |
| 60 | + |
| 61 | + # Verify DeltaResolvedTitle entries |
| 62 | + assert DeltaResolvedTitle.objects.count() == 3 # Still one per URL |
| 63 | + |
| 64 | + # URLs with general pattern should be unchanged |
| 65 | + for url in [url1, url2]: |
| 66 | + resolved = DeltaResolvedTitle.objects.get(delta_url=url) |
| 67 | + assert resolved.title_pattern == general_pattern |
| 68 | + assert resolved.resolved_title == "Original Title - Docs" |
| 69 | + |
| 70 | + # PDF URL should now use specific pattern |
| 71 | + resolved_pdf = DeltaResolvedTitle.objects.get(delta_url=url3) |
| 72 | + assert resolved_pdf.title_pattern == specific_pattern |
| 73 | + assert resolved_pdf.resolved_title == "Original Title - HTML" |
| 74 | + |
| 75 | + # Verify pattern relationships are maintained |
| 76 | + assert url1 in general_pattern.delta_urls.all() |
| 77 | + assert url2 in general_pattern.delta_urls.all() |
| 78 | + assert url3 in general_pattern.delta_urls.all() |
| 79 | + assert url3 in specific_pattern.delta_urls.all() |
0 commit comments