Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
03c5801
Added is_searchable and made published accept datetime cut-off parame…
tim-schilling Sep 1, 2025
12c377e
Used Entry.get_absolute_url to encapsulate www host.
tim-schilling Sep 1, 2025
837e808
Added support for searching ecosystem and blog entries.
tim-schilling Sep 1, 2025
5e22aee
Switched to assertQuerySetEqual for documents existence check
tim-schilling Sep 4, 2025
edea21c
Fixed is_searchable help_text. It's now English.
tim-schilling Sep 4, 2025
e9d3d3b
Extracted get_search_config helper function.
tim-schilling Sep 4, 2025
f3c5a83
Fixed docstring of _sync_views_to_db
tim-schilling Sep 4, 2025
4fa01d6
Switched the _sync helpers to use short-circuit if statements.
tim-schilling Sep 4, 2025
021a57d
Replaced WEBSITE.value usages with WEBSITE
tim-schilling Sep 4, 2025
e5a9452
Switched to assertQuerySetEqual away from set comparisons.
tim-schilling Sep 4, 2025
bcffdee
Refactored DocumentUrlTests into two separate tests
tim-schilling Sep 4, 2025
a7b3bbe
Consolidated migrations to add is_searchable to one migration.
tim-schilling Sep 4, 2025
c1a61a9
Reworked _sync_views_to_db to almost be readable.
tim-schilling Sep 4, 2025
8e01cca
Switched published argument to as_of
tim-schilling Sep 5, 2025
629bc6d
Replaced DocumentRelease.support_end with Release.eol_date
tim-schilling Sep 17, 2025
c89240a
Moved more logic into SearchableView.
tim-schilling Sep 17, 2025
606d381
Renamed the key of DocumentationCategory.WEBSITE
tim-schilling Sep 18, 2025
396050c
Switched sync methods to use Document.objects.bulk_create()
tim-schilling Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions blog/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@

@admin.register(Entry)
class EntryAdmin(admin.ModelAdmin):
list_display = ("headline", "pub_date", "is_active", "is_published", "author")
list_filter = ("is_active",)
list_display = (
"headline",
"pub_date",
"is_active",
"is_published",
"is_searchable",
"author",
)
list_filter = ("is_active", "is_searchable")
exclude = ("summary_html", "body_html")
prepopulated_fields = {"slug": ("headline",)}
raw_id_fields = ["social_media_card"]
Expand Down
18 changes: 18 additions & 0 deletions blog/migrations/0006_entry_is_searchable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2 on 2025-07-24 07:21

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('blog', '0005_entry_social_media_card'),
]

operations = [
migrations.AddField(
model_name='entry',
name='is_searchable',
field=models.BooleanField(help_text='Tick to make this entry allow this entry to appear in the Django documentation search.', null=True),
),
]
19 changes: 19 additions & 0 deletions blog/migrations/0007_set_is_searchable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 5.2 on 2025-07-24 07:22

from django.db import migrations


def set_is_searchable(apps, schema_editor):
Entry = apps.get_model("blog", "Entry")
# If this is large, this should be split into batched updates
Entry.objects.filter(is_searchable__isnull=True).update(is_searchable=False)

class Migration(migrations.Migration):

dependencies = [
('blog', '0006_entry_is_searchable'),
]

operations = [
migrations.RunPython(set_is_searchable, reverse_code=migrations.RunPython.noop, elidable=True),
]
18 changes: 18 additions & 0 deletions blog/migrations/0008_alter_entry_is_searchable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2 on 2025-07-24 07:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('blog', '0007_set_is_searchable'),
]

operations = [
migrations.AlterField(
model_name='entry',
name='is_searchable',
field=models.BooleanField(default=False, help_text='Tick to make this entry allow this entry to appear in the Django documentation search.'),
),
]
18 changes: 15 additions & 3 deletions blog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,17 @@ def _md_slugify(value, separator):


class EntryQuerySet(models.QuerySet):
def published(self):
return self.active().filter(pub_date__lte=timezone.now())
def published(self, pub_date=None):
if pub_date is None:
pub_date = timezone.now()
return self.active().filter(pub_date__lte=pub_date)

def active(self):
return self.filter(is_active=True)

def searchable(self):
return self.filter(is_searchable=True)


class ContentFormat(models.TextChoices):
REST = "reST", "reStructuredText"
Expand Down Expand Up @@ -126,6 +131,13 @@ class Entry(models.Model):
),
default=False,
)
is_searchable = models.BooleanField(
default=False,
help_text=_(
"Tick to make this entry allow this entry to appear in the "
"Django documentation search."
),
)
pub_date = models.DateTimeField(
verbose_name=_("Publication date"),
help_text=_(
Expand Down Expand Up @@ -168,7 +180,7 @@ def get_absolute_url(self):
"day": self.pub_date.strftime("%d").lower(),
"slug": self.slug,
}
return reverse("weblog:entry", kwargs=kwargs)
return reverse("weblog:entry", kwargs=kwargs, host="www")

def is_published(self):
"""
Expand Down
25 changes: 25 additions & 0 deletions blog/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,31 @@ def test_manager_published(self):
["past active"],
transform=lambda entry: entry.headline,
)
self.assertQuerySetEqual(
Entry.objects.published(self.tomorrow),
["future active", "past active"],
transform=lambda entry: entry.headline,
)

def test_manager_searchable(self):
"""
Make sure that the Entry manager's `searchable` method works
"""
Entry.objects.create(
pub_date=self.yesterday,
is_searchable=False,
headline="not searchable",
slug="a",
)
Entry.objects.create(
pub_date=self.yesterday, is_searchable=True, headline="searchable", slug="b"
)

self.assertQuerySetEqual(
Entry.objects.searchable(),
["searchable"],
transform=lambda entry: entry.headline,
)

def test_docutils_safe(self):
"""
Expand Down
18 changes: 18 additions & 0 deletions docs/migrations/0007_documentrelease_support_end.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2 on 2025-07-23 16:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('docs', '0006_alter_document_metadata_noop'),
]

operations = [
migrations.AddField(
model_name='documentrelease',
name='support_end',
field=models.DateField(blank=True, help_text='The end of support for this release of Django.', null=True),
),
]
90 changes: 89 additions & 1 deletion docs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@
from django.db import models, transaction
from django.db.models import Q
from django.db.models.fields.json import KeyTextTransform
from django.test import RequestFactory
from django.urls import resolve, reverse as reverse_path
from django.utils.functional import cached_property
from django.utils.html import strip_tags
from django_hosts.resolvers import reverse

from blog.models import Entry
from releases.models import Release

from . import utils
Expand All @@ -31,6 +34,7 @@
START_SEL,
STOP_SEL,
TSEARCH_CONFIG_LANGUAGES,
DocumentationCategory,
)


Expand Down Expand Up @@ -95,6 +99,11 @@ class DocumentRelease(models.Model):
on_delete=models.CASCADE,
)
is_default = models.BooleanField(default=False)
support_end = models.DateField(
null=True,
blank=True,
help_text="The end of support for this release of Django.",
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a dedicated field for this, why couldn't we get the information from release.eol_date instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why we do code reviews. Let me get this switched over.


objects = DocumentReleaseQuerySet.as_manager()

Expand Down Expand Up @@ -212,6 +221,83 @@ def sync_to_db(self, decoded_documents):
)
document.save(update_fields=("metadata",))

self._sync_blog_to_db()
self._sync_views_to_db()

def _sync_blog_to_db(self):
"""
Sync the blog entries into search based on the release documents
support end date.
"""
if self.lang == "en" and self.support_end:
for entry in Entry.objects.published(self.support_end).searchable():
Document.objects.create(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use bulk_create() instead of creating individual Document objects?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't tell if this is a suggestion or a question. If it's a question, yes it's possible. I didn't because I honestly don't expect this to be more than 50 records.

If this is more of a suggestion, should I take the optimization to batch fetch the searchable entries as well? That way if we somehow have 2000 records, we don't fetch them all from the DB at once. I've also used .iterator(), collected until the batch size, then called bulk_create().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bmispelon I think this is still an open conversation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converted over.

release=self,
path=entry.get_absolute_url(),
title=entry.headline,
metadata={
"body": entry.body_html,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE.value,
"title": "News",
},
],
"parents": DocumentationCategory.WEBSITE.value,
"slug": entry.slug,
"title": entry.headline,
"toc": "",
},
config=TSEARCH_CONFIG_LANGUAGES.get(
self.lang[:2], DEFAULT_TEXT_SEARCH_CONFIG
),
)

def _sync_views_to_db(self):
"""
Sync the blog entries into search based on the release documents
support end date.
"""
if self.lang == "en":
# The request needs to come through as a valid one, it's best if it
# matches the exact host we're looking for.
www_hosts = [
host for host in settings.ALLOWED_HOSTS if host.startswith("www.")
]
if not www_hosts or not (www_host := www_hosts[0]):
return
synced_views = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should live in a constant at the top level of a module (I'll let you decide which module makes more sense 😁 )

Copy link
Member Author

@tim-schilling tim-schilling Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're moving this from the code away from the usage logic, then we should use a dataclass so it's not relying on positional arguments. That gets a bit more verbose and likely could go in an entirely separate module.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be side-stepped due to #2136 (comment)

# Page title, url name, url kwargs
("Django's Ecosystem", "community-ecosystem", {}),
]
for title, url_name, kwargs in synced_views:
absolute_url = reverse(url_name, kwargs=kwargs, host="www")
path = reverse_path(url_name, kwargs=kwargs)
request = RequestFactory().get(path, HTTP_HOST=www_host)
body = resolve(path).func(request).render().text
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot of stuff going on in such a few lines: you make it look deceptively simple 😁

I'm curious: did you try other approaches before settling on this? I'm a bit worried that the whole www_host logic could be a bit brittle. If we're calling the view function directly anyway, can we skip setting the host on the request object?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is definitely for a reason, but let me try once more and better document the weirdness.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, so the simplest we can get it is to use get_template("aggregator/ecosystem.html").render(). However, that does sort of open us to the problem of the view (which is defined in the urls.py) of using a different template or implementation than expected. However, using this approach eliminates the need to get the host which eliminates having the search the ALLOWED_HOSTS setting. So I think it's worth it. Let me know what you think.

# Need to parse the body element.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot this TODO yet.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bmispelon do you have any good ideas here?

Document.objects.create(
release=self,
path=absolute_url,
title=title,
metadata={
"body": body,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE.value,
"title": "Website",
},
],
"parents": DocumentationCategory.WEBSITE.value,
"slug": url_name,
"title": title,
"toc": "",
},
config=TSEARCH_CONFIG_LANGUAGES.get(
self.lang[:2], DEFAULT_TEXT_SEARCH_CONFIG
),
)


def _clean_document_path(path):
# We have to be a bit careful to reverse-engineer the correct
Expand All @@ -224,7 +310,9 @@ def _clean_document_path(path):


def document_url(doc):
if doc.path:
if doc.metadata.get("parents") == DocumentationCategory.WEBSITE.value:
return doc.path
elif doc.path:
kwargs = {
"lang": doc.release.lang,
"version": doc.release.version,
Expand Down
1 change: 1 addition & 0 deletions docs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class DocumentationCategory(TextChoices):
TOPICS = "topics", _("Using Django")
HOWTO = "howto", _("How-to guides")
RELEASE_NOTES = "releases", _("Release notes")
WEBSITE = "weblog", _("Django Website")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd change the value to keep it consistent with the variable name (the goal is to eventually index all kinds of pages and not just the blog, right?):

Suggested change
WEBSITE = "weblog", _("Django Website")
WEBSITE = "website", _("Django Website")

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is a bigger question. We need the value to be "weblog" to make the breadcrumbs work properly in the search area without adding another hack on the search results side to adjust things. I suppose we could just have "www.djangoproject.com/website/" redirect to "www.djangoproject.com/weblog/". However, I have no idea what to call that category of results on the actual page. I think right now I have it as "Django Website" which doesn't seem right.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the breadcrumbs work for you in the first place? I don't think I've ever used that feature and it doesn't seem to work correctly for me locally: even for documents with two or more parents, the links for all the parents are all the same and go to the document's own page.

Not sure I'd want to start adding redirects, that seems like solving the problem at the wrong level to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what I ran into originally. Looks like we can rename this to be whatever we want.

The breadcrumbs links on the search just point directly to the search result. Not the actual parents. May be a bug to be honest 🫣


@classmethod
def parse(cls, value, default=None):
Expand Down
6 changes: 3 additions & 3 deletions docs/templates/docs/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ <h2>{% translate "No search query given" %}</h2>
{% for result in page.object_list %}
<dt>
<h2 class="result-title">
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=result.path host 'docs' %}{% if not start_sel in result.headline %}{{ result.highlight|fragment }}{% endif %}">{{ result.headline|safe }}</a>
<a href="{{ result.get_absolute_url }}{% if not start_sel in result.headline %}{{ result.highlight|fragment }}{% endif %}">{{ result.headline|safe }}</a>
</h2>
<span class="meta breadcrumbs">
{% for breadcrumb in result.breadcrumbs %}
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=breadcrumb.path host 'docs' %}">{{ breadcrumb.title }}</a>{% if not forloop.last %} <span class="arrow">»</span>{% endif %}
<a href="{{ result.get_absolute_url }}">{{ breadcrumb.title }}</a>{% if not forloop.last %} <span class="arrow">»</span>{% endif %}
{% endfor %}
</span>
</dt>
Expand All @@ -60,7 +60,7 @@ <h2 class="result-title">
<ul class="code-links">
{% for name, value in result_code_links.items %}
<li>
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=result.path host 'docs' %}#{{ value.full_path }}">
<a href="{{ result.get_absolute_url }}#{{ value.full_path }}">
<div>
<code>{{ name }}</code>
{% if value.module_path %}<div class="meta">{{ value.module_path }}</div>{% endif %}
Expand Down
Loading