Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions peachjam/adapters/indigo.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,14 +604,14 @@ def download_source_file(self, url, doc, title):
filename = f"{slugify(title)}.pdf"
f.write(r.content)

SourceFile.objects.update_or_create(
document=doc,
defaults={
"file": File(f, name=filename),
"mimetype": magic.from_file(f.name, mime=True),
"size": len(r.content),
},
)
# there is a small race condition here if SourceFile is created in the db while we do this. In that case
# the task will fail and be re-tried.
source_file = getattr(doc, "source_file", None) or SourceFile(document=doc)
source_file.track_changes()
Copy link
Contributor

@mugiwaramunyi mugiwaramunyi Mar 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I understand it now....I think. A bad interaction between SourceFile's base class and the Lifecycle library, led to the weird behaviour.

Since the fix is the AttributeHooksMixin; Does this mean for classes that inherit it, they have to call track_changes() every time they want the hooks to fire, for all cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Classes that use AttributeHooksMixin must call track_changes() to setup change tracking, yes. So it's opt-in.

source_file.file = File(f, name=filename)
source_file.mimetype = magic.from_file(f.name, mime=True)
source_file.size = len(r.content)
source_file.save()

def get_size_from_url(self, url):
logger.info(" Getting the file size ...")
Expand Down
13 changes: 7 additions & 6 deletions peachjam/adapters/judgments.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,14 @@ def attach_source_file(self, doc, created_doc):
ext = guess_extension(mimetype) or ""
filename = f"{slugify(doc['title'])[:200]}{ext}"

sf, _ = SourceFile.objects.update_or_create(
document=created_doc,
defaults={
"file": File(f, filename),
"mimetype": mimetype,
},
source_file = getattr(created_doc, "source_file", None) or SourceFile(
document=created_doc
)
source_file.track_changes()
source_file.file = File(f, filename)
source_file.mimetype = mimetype
source_file.save()
sf = source_file
sf.ensure_file_as_pdf()

def get_content_html(self, doc):
Expand Down
182 changes: 173 additions & 9 deletions peachjam/admin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import json
import logging
from datetime import date

from background_task.models import Task
Expand All @@ -9,6 +10,7 @@
from django import forms
from django.conf import settings
from django.contrib import admin, messages
from django.contrib.admin import helpers
from django.contrib.admin.utils import flatten_fieldsets, quote
from django.contrib.auth import get_user_model
from django.contrib.auth.admin import UserAdmin
Expand All @@ -19,12 +21,13 @@
from django.db import transaction
from django.http.response import FileResponse, HttpResponse, HttpResponseRedirect
from django.shortcuts import get_object_or_404, redirect, render
from django.template.defaultfilters import filesizeformat
from django.template.response import TemplateResponse
from django.urls import path, reverse
from django.utils import timezone
from django.utils.dateparse import parse_date
from django.utils.dates import MONTHS
from django.utils.html import format_html
from django.utils.html import format_html, format_html_join
from django.utils.text import slugify
from django.utils.translation import gettext as _
from django.utils.translation import gettext_lazy
Expand Down Expand Up @@ -81,7 +84,6 @@
ExternalDocument,
Gazette,
GenericDocument,
Image,
Ingestor,
IngestorSetting,
Journal,
Expand Down Expand Up @@ -144,6 +146,7 @@
from peachjam_search.tasks import search_model_saved

User = get_user_model()
log = logging.getLogger(__name__)


class BaseAdmin(admin.ModelAdmin):
Expand Down Expand Up @@ -490,10 +493,6 @@ class AttachedFilesInline(BaseAttachmentFileInline):
form = AttachedFilesForm


class ImageInline(BaseAttachmentFileInline):
model = Image


class BackgroundTaskInline(GenericTabularInline):
model = Task
ct_field = "creator_content_type"
Expand Down Expand Up @@ -615,9 +614,7 @@ class DocumentAdmin(AccessGroupMixin, BaseAdmin):
PublicationFileInline,
AlternativeNameInline,
AttachedFilesInline,
ImageInline,
CustomPropertyInline,
BackgroundTaskInline,
]
list_display = (
"title",
Expand All @@ -640,6 +637,8 @@ class DocumentAdmin(AccessGroupMixin, BaseAdmin):
"metadata_json",
"work_link",
"document_access_link",
"background_tasks",
"images",
)
exclude = ("doc_type",)
date_hierarchy = "date"
Expand Down Expand Up @@ -700,6 +699,7 @@ class DocumentAdmin(AccessGroupMixin, BaseAdmin):
{
"fields": [
"source_html",
"images",
]
},
),
Expand All @@ -712,6 +712,7 @@ class DocumentAdmin(AccessGroupMixin, BaseAdmin):
"allow_robots",
"restricted",
"document_access_link",
"background_tasks",
"document_content_toc_json",
"metadata_json",
],
Expand Down Expand Up @@ -763,6 +764,66 @@ def document_content_html_is_akn(self, obj):
except ObjectDoesNotExist:
return False

@admin.display(description=gettext_lazy("Background tasks"))
def background_tasks(self, obj):
if not obj or not obj.pk:
return "-"

tasks = Task.objects.filter(
creator_content_type=ContentType.objects.get_for_model(
obj, for_concrete_model=False
),
creator_object_id=obj.pk,
).order_by("run_at", "pk")
if not tasks.exists():
return "-"

return format_html(
"<ul>{}</ul>",
format_html_join(
"",
'<li><a href="{}">{}</a> ({}, attempts: {})</li>',
(
(
reverse(
"admin:background_task_task_change",
kwargs={"object_id": task.pk},
),
task.task_name,
task.run_at,
task.attempts,
)
for task in tasks
),
),
)

@admin.display(description=gettext_lazy("Images"))
def images(self, obj):
if not obj or not obj.pk:
return "-"

images = obj.images.all().order_by("pk")
if not images.exists():
return "-"

return format_html(
"<ul>{}</ul>",
format_html_join(
"",
'<li><a href="{}" target="_blank">{}</a> ({}, {})</li>',
(
(
image.file.url,
image.filename or image.file.name,
image.mimetype,
filesizeformat(image.size),
)
for image in images
),
),
)

def get_form(self, request, obj=None, **kwargs):
if obj is None:
kwargs["fields"] = self.new_document_form_mixin.adjust_fields(
Expand All @@ -778,13 +839,117 @@ class NewForm(self.new_document_form_mixin, form):
return super().get_form(request, obj, **kwargs)

def render_change_form(self, request, context, *args, **kwargs):
# The document admin form is complex. Sometimes we get validation errors that are about hidden fields.
# So here we check for validation errors in the form and any inlines and add a non-field error to the main
# form with a summary of the fields that have errors. We also log the validation errors with some context to
# help with debugging.
if request.method == "POST" and not getattr(
request, "_document_admin_validation_reported", False
):
adminform = context.get("adminform")
report = self._build_validation_error_report(context)
if adminform and report["summary_fields"]:
request._document_admin_validation_reported = True
model_label = (
f"{self.model._meta.app_label}.{self.model._meta.model_name}"
)
object_id = getattr(context.get("original"), "pk", None)
user = getattr(request, "user", None)
summary_message = _(
"Validation errors were found in: %(fields)s. Check inline sections and non-field errors as well."
) % {"fields": ", ".join(report["summary_fields"])}
log.warning(
"Admin validation errors for %s object_id=%s user=%s path=%s details=%s",
model_label,
object_id,
user,
request.path,
report["details"],
)
form = adminform.form
if summary_message not in form.non_field_errors():
form.add_error(None, summary_message)
context["errors"] = helpers.AdminErrorList(
form,
[
inline_admin_formset.formset
for inline_admin_formset in context.get(
"inline_admin_formsets", []
)
],
)
self.message_user(
request,
summary_message,
level=messages.ERROR,
)

# this is our only chance to inject a pre-filled field from the querystring for both add and change
if request.GET.get("stage"):
context["adminform"].form.fields["edit_activity_stage"].initial = (
request.GET["stage"]
)
return super().render_change_form(request, context, *args, **kwargs)

def _build_validation_error_report(self, context):
report = {"summary_fields": [], "details": {"form": {}, "formsets": []}}
adminform = context.get("adminform")
if not adminform:
return report

def add_summary(label):
if label not in report["summary_fields"]:
report["summary_fields"].append(label)

form = adminform.form
if form.errors:
report["details"]["form"]["fields"] = {
field: [str(error) for error in errors]
for field, errors in form.errors.items()
}
for field in form.errors:
add_summary(field)

non_field_errors = [str(error) for error in form.non_field_errors()]
if non_field_errors:
report["details"]["form"]["non_field_errors"] = non_field_errors
add_summary(_("main form (non-field)"))

for inline_admin_formset in context.get("inline_admin_formsets", []):
formset = inline_admin_formset.formset
formset_report = {
"prefix": formset.prefix,
"non_form_errors": [str(error) for error in formset.non_form_errors()],
"forms": [],
}
if formset_report["non_form_errors"]:
add_summary(f"{formset.prefix} (non-form)")

for index, inline_form in enumerate(formset.forms):
form_errors = {
field: [str(error) for error in errors]
for field, errors in inline_form.errors.items()
}
inline_non_field_errors = [
str(error) for error in inline_form.non_field_errors()
]
if form_errors or inline_non_field_errors:
entry = {
"index": index,
"fields": form_errors,
"non_field_errors": inline_non_field_errors,
}
formset_report["forms"].append(entry)
for field in form_errors:
add_summary(f"{formset.prefix}[{index}].{field}")
if inline_non_field_errors:
add_summary(f"{formset.prefix}[{index}] (non-field)")

if formset_report["non_form_errors"] or formset_report["forms"]:
report["details"]["formsets"].append(formset_report)

return report

def save_model(self, request, obj, form, change):
if not change:
obj.created_by = request.user
Expand Down Expand Up @@ -1807,7 +1972,6 @@ class GazetteAdmin(ImportExportMixin, DocumentAdmin):
resource_classes = [GazetteResource]
inlines = [
SourceFileInline,
BackgroundTaskInline,
]
prepopulated_fields = {}

Expand Down
12 changes: 9 additions & 3 deletions peachjam/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@ def _save_m2m(self):
def process_upload_file(self, upload_file):
# store the uploaded file
upload_file.seek(0)
SourceFile(
source_file = SourceFile(
document=self.instance,
file=File(upload_file, name=upload_file.name),
filename=upload_file.name,
mimetype=upload_file.content_type,
).save()
)
source_file.track_changes()
source_file.file = File(upload_file, name=upload_file.name)
source_file.save()

@classmethod
def adjust_fieldsets(cls, fieldsets):
Expand Down Expand Up @@ -453,6 +455,10 @@ class Meta:
fields = "__all__"
exclude = ("file_as_pdf",)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.instance.track_changes()


class PublicationFileForm(AttachmentFormMixin, forms.ModelForm):
class Meta:
Expand Down
13 changes: 10 additions & 3 deletions peachjam/models/attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
from django.template.loader import render_to_string
from django.urls import reverse
from django.utils.translation import gettext_lazy as _
from django_lifecycle import AFTER_SAVE, BEFORE_SAVE, LifecycleModelMixin
from django_lifecycle import AFTER_SAVE, BEFORE_SAVE
from docpipe.soffice import soffice_convert

from peachjam.helpers import html_to_png
from peachjam.models.lifecycle import on_attribute_changed
from peachjam.models.lifecycle import AttributeHooksMixin, on_attribute_changed
from peachjam.storage import DynamicStorageFileField

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -93,7 +93,14 @@ def from_docpipe_attachment(cls, attachment):
)


class SourceFile(LifecycleModelMixin, AttachmentAbstractModel):
class SourceFile(AttributeHooksMixin, AttachmentAbstractModel):
"""A file that was uploaded as the source file for a document. This is typically the original Word or PDF file.

The file field is a DynamicStorageFileField and so its state during __init__ differs to just after __init__,
so we can't rely on LifecycleModelMixin to automatically track changes. Instead, we use AttributeHooksMixin and
must explicitly start change tracking with track_changes().
"""

SAVE_FOLDER = "source_file"

document = models.OneToOneField(
Expand Down
Loading
Loading