Skip to content
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
03c5801
Added is_searchable and made published accept datetime cut-off parame…
tim-schilling Sep 1, 2025
12c377e
Used Entry.get_absolute_url to encapsulate www host.
tim-schilling Sep 1, 2025
837e808
Added support for searching ecosystem and blog entries.
tim-schilling Sep 1, 2025
5e22aee
Switched to assertQuerySetEqual for documents existence check
tim-schilling Sep 4, 2025
edea21c
Fixed is_searchable help_text. It's now English.
tim-schilling Sep 4, 2025
e9d3d3b
Extracted get_search_config helper function.
tim-schilling Sep 4, 2025
f3c5a83
Fixed docstring of _sync_views_to_db
tim-schilling Sep 4, 2025
4fa01d6
Switched the _sync helpers to use short-circuit if statements.
tim-schilling Sep 4, 2025
021a57d
Replaced WEBSITE.value usages with WEBSITE
tim-schilling Sep 4, 2025
e5a9452
Switched to assertQuerySetEqual away from set comparisons.
tim-schilling Sep 4, 2025
bcffdee
Refactored DocumentUrlTests into two separate tests
tim-schilling Sep 4, 2025
a7b3bbe
Consolidated migrations to add is_searchable to one migration.
tim-schilling Sep 4, 2025
c1a61a9
Reworked _sync_views_to_db to almost be readable.
tim-schilling Sep 4, 2025
8e01cca
Switched published argument to as_of
tim-schilling Sep 5, 2025
629bc6d
Replaced DocumentRelease.support_end with Release.eol_date
tim-schilling Sep 17, 2025
c89240a
Moved more logic into SearchableView.
tim-schilling Sep 17, 2025
606d381
Renamed the key of DocumentationCategory.WEBSITE
tim-schilling Sep 18, 2025
396050c
Switched sync methods to use Document.objects.bulk_create()
tim-schilling Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions blog/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@

@admin.register(Entry)
class EntryAdmin(admin.ModelAdmin):
list_display = ("headline", "pub_date", "is_active", "is_published", "author")
list_filter = ("is_active",)
list_display = (
"headline",
"pub_date",
"is_active",
"is_published",
"is_searchable",
"author",
)
list_filter = ("is_active", "is_searchable")
exclude = ("summary_html", "body_html")
prepopulated_fields = {"slug": ("headline",)}
raw_id_fields = ["social_media_card"]
Expand Down
18 changes: 18 additions & 0 deletions blog/migrations/0006_entry_is_searchable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2 on 2025-09-03 20:02

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('blog', '0005_entry_social_media_card'),
]

operations = [
migrations.AddField(
model_name='entry',
name='is_searchable',
field=models.BooleanField(default=False, help_text='Tick to make this entry appear in the Django documentation search.'),
),
]
17 changes: 14 additions & 3 deletions blog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,17 @@ def _md_slugify(value, separator):


class EntryQuerySet(models.QuerySet):
def published(self):
return self.active().filter(pub_date__lte=timezone.now())
def published(self, as_of=None):
if as_of is None:
as_of = timezone.now()
return self.active().filter(pub_date__lte=as_of)

def active(self):
return self.filter(is_active=True)

def searchable(self):
return self.filter(is_searchable=True)


class ContentFormat(models.TextChoices):
REST = "reST", "reStructuredText"
Expand Down Expand Up @@ -126,6 +131,12 @@ class Entry(models.Model):
),
default=False,
)
is_searchable = models.BooleanField(
default=False,
help_text=_(
"Tick to make this entry appear in the Django documentation search."
),
)
pub_date = models.DateTimeField(
verbose_name=_("Publication date"),
help_text=_(
Expand Down Expand Up @@ -168,7 +179,7 @@ def get_absolute_url(self):
"day": self.pub_date.strftime("%d").lower(),
"slug": self.slug,
}
return reverse("weblog:entry", kwargs=kwargs)
return reverse("weblog:entry", kwargs=kwargs, host="www")

def is_published(self):
"""
Expand Down
25 changes: 25 additions & 0 deletions blog/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,31 @@ def test_manager_published(self):
["past active"],
transform=lambda entry: entry.headline,
)
self.assertQuerySetEqual(
Entry.objects.published(self.tomorrow),
["future active", "past active"],
transform=lambda entry: entry.headline,
)

def test_manager_searchable(self):
"""
Make sure that the Entry manager's `searchable` method works
"""
Entry.objects.create(
pub_date=self.yesterday,
is_searchable=False,
headline="not searchable",
slug="a",
)
Entry.objects.create(
pub_date=self.yesterday, is_searchable=True, headline="searchable", slug="b"
)

self.assertQuerySetEqual(
Entry.objects.searchable(),
["searchable"],
transform=lambda entry: entry.headline,
)

def test_docutils_safe(self):
"""
Expand Down
18 changes: 18 additions & 0 deletions docs/migrations/0007_documentrelease_support_end.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 5.2 on 2025-07-23 16:31

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('docs', '0006_alter_document_metadata_noop'),
]

operations = [
migrations.AddField(
model_name='documentrelease',
name='support_end',
field=models.DateField(blank=True, help_text='The end of support for this release of Django.', null=True),
),
]
89 changes: 85 additions & 4 deletions docs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,31 @@
from django.db import models, transaction
from django.db.models import Q
from django.db.models.fields.json import KeyTextTransform
from django.template.loader import get_template
from django.utils.functional import cached_property
from django.utils.html import strip_tags
from django_hosts.resolvers import reverse

from blog.models import Entry
from releases.models import Release

from . import utils
from .search import (
DEFAULT_TEXT_SEARCH_CONFIG,
DOCUMENT_SEARCH_VECTOR,
SEARCHABLE_VIEWS,
START_SEL,
STOP_SEL,
TSEARCH_CONFIG_LANGUAGES,
DocumentationCategory,
)


def get_search_config(lang):
"""Determine the PostgreSQL search language"""
return TSEARCH_CONFIG_LANGUAGES.get(lang[:2], DEFAULT_TEXT_SEARCH_CONFIG)


class DocumentReleaseQuerySet(models.QuerySet):
def current(self, lang="en"):
current = self.get(is_default=True)
Expand Down Expand Up @@ -95,6 +104,11 @@ class DocumentRelease(models.Model):
on_delete=models.CASCADE,
)
is_default = models.BooleanField(default=False)
support_end = models.DateField(
null=True,
blank=True,
help_text="The end of support for this release of Django.",
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a dedicated field for this, why couldn't we get the information from release.eol_date instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is why we do code reviews. Let me get this switched over.


objects = DocumentReleaseQuerySet.as_manager()

Expand Down Expand Up @@ -202,16 +216,81 @@ def sync_to_db(self, decoded_documents):
path=document_path,
title=html.unescape(strip_tags(document["title"])),
metadata=document,
config=TSEARCH_CONFIG_LANGUAGES.get(
self.lang[:2], DEFAULT_TEXT_SEARCH_CONFIG
),
config=get_search_config(self.lang),
)
for document in self.documents.all():
document.metadata["breadcrumbs"] = list(
Document.objects.breadcrumbs(document).values("title", "path")
)
document.save(update_fields=("metadata",))

self._sync_blog_to_db()
self._sync_views_to_db()

def _sync_blog_to_db(self):
"""
Sync the blog entries into search based on the release documents
support end date.
"""
if self.lang != "en" or not self.support_end:
# The blog is only written in English, and we need to know
# the release's support end to know when to stop considering
# blog posts relevant.
return
for entry in Entry.objects.published(self.support_end).searchable():
Document.objects.create(
release=self,
path=entry.get_absolute_url(),
title=entry.headline,
metadata={
"body": entry.body_html,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE,
"title": "News",
},
],
"parents": DocumentationCategory.WEBSITE,
"slug": entry.slug,
"title": entry.headline,
"toc": "",
},
config=get_search_config(self.lang),
)

def _sync_views_to_db(self):
"""
Sync the specific views into search based on the release documents
support end date.
"""
if self.lang != "en":
return # The searchable views are only written in English currently

for searchable_view in SEARCHABLE_VIEWS:
absolute_url = reverse(searchable_view.url_name, host="www")
# This must match the template used for the url `community-ecosystem`
html = get_template("aggregator/ecosystem.html").render()
# Need to parse the body element.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make absolute_url and html attributes (likely a property) of SearchableView? The name of the template should be an attribute of the SearchableView too and not be hardcoded here.

Document.objects.create(
release=self,
path=absolute_url,
title=searchable_view.page_title,
metadata={
"body": html,
"breadcrumbs": [
{
"path": DocumentationCategory.WEBSITE,
"title": "Website",
},
],
"parents": DocumentationCategory.WEBSITE,
"slug": searchable_view.url_name,
"title": searchable_view.page_title,
"toc": "",
},
config=get_search_config(self.lang),
)


def _clean_document_path(path):
# We have to be a bit careful to reverse-engineer the correct
Expand All @@ -224,7 +303,9 @@ def _clean_document_path(path):


def document_url(doc):
if doc.path:
if doc.metadata.get("parents") == DocumentationCategory.WEBSITE:
return doc.path
elif doc.path:
kwargs = {
"lang": doc.release.lang,
"version": doc.release.version,
Expand Down
14 changes: 14 additions & 0 deletions docs/search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from dataclasses import dataclass

from django.contrib.postgres.search import SearchVector
from django.db.models import F, TextChoices
from django.db.models.fields.json import KeyTextTransform
Expand Down Expand Up @@ -65,10 +67,22 @@ class DocumentationCategory(TextChoices):
TOPICS = "topics", _("Using Django")
HOWTO = "howto", _("How-to guides")
RELEASE_NOTES = "releases", _("Release notes")
WEBSITE = "weblog", _("Django Website")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd change the value to keep it consistent with the variable name (the goal is to eventually index all kinds of pages and not just the blog, right?):

Suggested change
WEBSITE = "weblog", _("Django Website")
WEBSITE = "website", _("Django Website")

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is a bigger question. We need the value to be "weblog" to make the breadcrumbs work properly in the search area without adding another hack on the search results side to adjust things. I suppose we could just have "www.djangoproject.com/website/" redirect to "www.djangoproject.com/weblog/". However, I have no idea what to call that category of results on the actual page. I think right now I have it as "Django Website" which doesn't seem right.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the breadcrumbs work for you in the first place? I don't think I've ever used that feature and it doesn't seem to work correctly for me locally: even for documents with two or more parents, the links for all the parents are all the same and go to the document's own page.

Not sure I'd want to start adding redirects, that seems like solving the problem at the wrong level to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what I ran into originally. Looks like we can rename this to be whatever we want.

The breadcrumbs links on the search just point directly to the search result. Not the actual parents. May be a bug to be honest 🫣


@classmethod
def parse(cls, value, default=None):
try:
return cls(value)
except ValueError:
return None


@dataclass
class SearchableView:
page_title: str
url_name: str


SEARCHABLE_VIEWS = [
SearchableView(page_title="Django's Ecosystem", url_name="community-ecosystem")
]
6 changes: 3 additions & 3 deletions docs/templates/docs/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ <h2>{% translate "No search query given" %}</h2>
{% for result in page.object_list %}
<dt>
<h2 class="result-title">
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=result.path host 'docs' %}{% if not start_sel in result.headline %}{{ result.highlight|fragment }}{% endif %}">{{ result.headline|safe }}</a>
<a href="{{ result.get_absolute_url }}{% if not start_sel in result.headline %}{{ result.highlight|fragment }}{% endif %}">{{ result.headline|safe }}</a>
</h2>
<span class="meta breadcrumbs">
{% for breadcrumb in result.breadcrumbs %}
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=breadcrumb.path host 'docs' %}">{{ breadcrumb.title }}</a>{% if not forloop.last %} <span class="arrow">»</span>{% endif %}
<a href="{{ result.get_absolute_url }}">{{ breadcrumb.title }}</a>{% if not forloop.last %} <span class="arrow">»</span>{% endif %}
{% endfor %}
</span>
</dt>
Expand All @@ -60,7 +60,7 @@ <h2 class="result-title">
<ul class="code-links">
{% for name, value in result_code_links.items %}
<li>
<a href="{% url 'document-detail' lang=result.release.lang version=result.release.version url=result.path host 'docs' %}#{{ value.full_path }}">
<a href="{{ result.get_absolute_url }}#{{ value.full_path }}">
<div>
<code>{{ name }}</code>
{% if value.module_path %}<div class="meta">{{ value.module_path }}</div>{% endif %}
Expand Down
Loading