Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@
TEMPLATES[0]["OPTIONS"]["debug"] = True # type: ignore # noqa F405
# Your stuff...
# ------------------------------------------------------------------------------


CELERY_TASK_ALWAYS_EAGER = True # Executes tasks immediately instead of sending to the queue
CELERY_TASK_EAGER_PROPAGATES = True # Raises exceptions in the main thread for easier debugging
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Generated by Django 4.2.9 on 2025-02-24 22:40

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("sde_collections", "0075_alter_collection_reindexing_status_and_more"),
]

operations = [
migrations.AddField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
default="pending",
max_length=20,
),
),
migrations.AddField(
model_name="deltaresolvedtitle",
name="updated_at",
field=models.DateTimeField(auto_now=True),
),
migrations.AddField(
model_name="deltaresolvedtitleerror",
name="updated_at",
field=models.DateTimeField(auto_now=True),
),
migrations.AddIndex(
model_name="deltaresolvedtitle",
index=models.Index(fields=["status", "created_at"], name="sde_collect_status_42dc80_idx"),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
blank=True,
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
max_length=20,
null=True,
),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
default="",
max_length=20,
null=True,
),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
default="",
max_length=20,
),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
default="",
max_length=20,
null=True,
),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
default="",
max_length=20,
),
),
migrations.AlterField(
model_name="deltaresolvedtitle",
name="status",
field=models.CharField(
choices=[
("pending", "Pending"),
("processing", "Processing"),
("resolved", "Resolved"),
("failed", "Failed"),
],
max_length=20,
null=True,
),
),
]
103 changes: 32 additions & 71 deletions sde_collections/models/delta_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from django.apps import apps
from django.core.exceptions import ValidationError
from django.db import models
from django.db import models, transaction

from ..utils.title_resolver import (
is_valid_xpath,
Expand Down Expand Up @@ -476,81 +476,21 @@ def generate_title_for_url(self, url_obj) -> tuple[str, str | None]:

def apply(self) -> None:
"""
Apply the title pattern to matching URLs:
Queue title pattern resolution for matching URLs:
1. Find new Curated URLs that match but weren't previously affected
2. Create Delta URLs only where the generated title differs
3. Update all matching Delta URLs with new titles
3. Queue background tasks for title resolution
4. Track title resolution status and errors
"""
DeltaUrl = apps.get_model("sde_collections", "DeltaUrl")
DeltaResolvedTitle = apps.get_model("sde_collections", "DeltaResolvedTitle")
DeltaResolvedTitleError = apps.get_model("sde_collections", "DeltaResolvedTitleError")

# Get newly matching Curated URLs
matching_curated_urls = self.get_matching_curated_urls()
previously_unaffected_curated = matching_curated_urls.exclude(
id__in=self.curated_urls.values_list("id", flat=True)
)

# Process each previously unaffected curated URL
for curated_url in previously_unaffected_curated:
if not self.is_most_distinctive_pattern(curated_url):
continue

new_title, error = self.generate_title_for_url(curated_url)

if error:
DeltaResolvedTitleError.objects.update_or_create(
delta_url=curated_url, defaults={"title_pattern": self, "error_string": error} # lookup field
)
continue

# Skip if the generated title matches existing or if Delta already exists
if (
curated_url.generated_title == new_title
or DeltaUrl.objects.filter(url=curated_url.url, collection=self.collection).exists()
):
continue

# Create new Delta URL with the new title
fields = {
field.name: getattr(curated_url, field.name)
for field in curated_url._meta.fields
if field.name not in ["id", "collection"]
}
fields["generated_title"] = new_title
fields["to_delete"] = False
fields["collection"] = self.collection

delta_url = DeltaUrl.objects.create(**fields)

# Record successful title resolution
DeltaResolvedTitle.objects.create(title_pattern=self, delta_url=delta_url, resolved_title=new_title)
# Inserting here to avoid circular import issue
from ..tasks import process_title_resolutions

# Update titles for all matching Delta URLs
for delta_url in self.get_matching_delta_urls():
if not self.is_most_distinctive_pattern(delta_url):
continue
def queue_task():
process_title_resolutions.delay(self.id)

new_title, error = self.generate_title_for_url(delta_url)

if error:
DeltaResolvedTitleError.objects.update_or_create(
delta_url=delta_url, defaults={"title_pattern": self, "error_string": error} # lookup field
)
continue

# Update title and record resolution - key change here
DeltaResolvedTitle.objects.update_or_create(
delta_url=delta_url, # Only use delta_url for lookup
defaults={"title_pattern": self, "resolved_title": new_title},
)

delta_url.generated_title = new_title
delta_url.save()

# Update pattern relationships
self.update_affected_delta_urls_list()
# Queue the background task only after the transaction commits (i.e, after apply() method)
transaction.on_commit(queue_task)

def unapply(self) -> None:
"""
Expand Down Expand Up @@ -670,24 +610,45 @@ class DeltaResolvedTitleBase(models.Model):
title_pattern = models.ForeignKey(DeltaTitlePattern, on_delete=models.CASCADE)
delta_url = models.OneToOneField("sde_collections.DeltaUrl", on_delete=models.CASCADE)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)

class Meta:
abstract = True


class DeltaResolvedTitle(DeltaResolvedTitleBase):
class Status(models.TextChoices):
PENDING = "pending", "Pending"
PROCESSING = "processing", "Processing"
RESOLVED = "resolved", "Resolved"
FAILED = "failed", "Failed"

resolved_title = models.CharField(blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, null=True)

class Meta:
verbose_name = "Resolved Title"
verbose_name_plural = "Resolved Titles"
indexes = [
models.Index(fields=["status", "created_at"]),
]

def save(self, *args, **kwargs):
# Finds the linked delta URL and deletes DeltaResolvedTitleError objects linked to it
DeltaResolvedTitleError.objects.filter(delta_url=self.delta_url).delete()
if self.status == self.Status.RESOLVED:
# Finds the linked delta URL and deletes DeltaResolvedTitleError objects linked to it
DeltaResolvedTitleError.objects.filter(delta_url=self.delta_url).delete()
super().save(*args, **kwargs)


class DeltaResolvedTitleError(DeltaResolvedTitleBase):
error_string = models.TextField(null=False, blank=False)
http_status_code = models.IntegerField(null=True, blank=True)

def save(self, *args, **kwargs):
# When saving an error, update the related DeltaResolvedTitle status
DeltaResolvedTitle.objects.update_or_create(
delta_url=self.delta_url,
title_pattern=self.title_pattern,
defaults={"status": DeltaResolvedTitle.Status.FAILED, "resolved_title": ""},
)
super().save(*args, **kwargs)
Loading