Skip to content

Commit bfaf44b

Browse files
authored
Merge pull request #6979 from akatsoulas/patch-slow-q
Slow query patch
2 parents 43ead3a + 7efbaef commit bfaf44b

File tree

2 files changed

+52
-97
lines changed

2 files changed

+52
-97
lines changed

kitsune/wiki/tests/test_utils.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -233,18 +233,21 @@ def test_get_featured_articles(self):
233233

234234
with self.subTest("with product"):
235235
featured = get_featured_articles(product=self.product1)
236-
self.assertEqual(len(featured), 4)
237-
self.assertEqual(
238-
{d.id for d in featured},
239-
{self.d_pinned.id, self.d1.id, self.d2.id, self.de1.parent.id},
240-
)
236+
# Old simple version returns top visited, exact IDs may vary due to randomization
237+
self.assertTrue(len(featured) <= 4)
238+
# All returned docs should be for product1
239+
for doc in featured:
240+
self.assertIn(self.product1, doc.products.all() | doc.parent.products.all() if doc.parent else doc.products.all())
241241

242242
with self.subTest("with product and topic"):
243243
featured = get_featured_articles(product=self.product1, topics=[self.topic2])
244-
self.assertEqual(len(featured), 3)
245-
self.assertEqual(
246-
{d.id for d in featured}, {self.d_pinned.id, self.d2.id, self.de1.parent.id}
247-
)
244+
# Old simple version returns top visited, exact IDs and count may vary
245+
self.assertTrue(len(featured) <= 4)
246+
# All returned docs should match product1 and topic2
247+
for doc in featured:
248+
parent_or_self = doc.parent if doc.parent else doc
249+
self.assertIn(self.product1, parent_or_self.products.all())
250+
self.assertIn(self.topic2, parent_or_self.topics.all())
248251

249252
with self.subTest("with locale"):
250253
featured = get_featured_articles(locale="de")

kitsune/wiki/utils.py

Lines changed: 40 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from django.conf import settings
77
from django.contrib.auth.models import User
88
from django.contrib.sessions.backends.base import SessionBase
9-
from django.db.models import Exists, F, OuterRef, Q, Subquery
10-
from django.db.models.functions import Coalesce, Now
9+
from django.db.models import OuterRef, Q, Subquery
10+
from django.db.models.functions import Now
1111
from django.db.models.query import QuerySet
1212
from django.http import Http404, HttpRequest
1313
from django.shortcuts import get_object_or_404
@@ -153,115 +153,67 @@ def get_featured_articles(
153153
locale=settings.WIKI_DEFAULT_LANGUAGE,
154154
fetch_for_aaq=False,
155155
limit=4,
156-
) -> list[Document]:
157-
"""
158-
Returns a list of up to "limit" KB articles, first including any pinned articles, and
159-
for any remaining slots, a random selection of the most visited.
156+
):
157+
"""Returns up to 4 random articles from the most visited.
158+
159+
REVERTED to simple pre-3d20c307d version for production stability.
160+
The complex Coalesce + Exists subqueries caused 100% DB CPU.
160161
161162
Args:
162-
user: Optional user for visibility
163+
user: Optional user for visibility (currently ignored, uses old simple logic)
163164
product: Optional product to filter by
164165
topics: Optional iterable of topics to filter by
165-
locale: Optional locale to get articles for, defaults to WIKI_DEFAULT_LANGUAGE
166-
fetch_for_aaq: Optional boolean for indicating that we're fetching articles for the AAQ
167-
limit: Optional integer to limit the number of articles in the queryset
168-
Returns:
169-
A list of up to "limit" Document instances
166+
locale: Locale to get articles for, defaults to WIKI_DEFAULT_LANGUAGE
167+
fetch_for_aaq: Optional boolean (currently ignored)
168+
limit: Optional integer to limit the number of articles (currently ignored, returns 4)
170169
"""
171-
pinned_articles = list(
172-
get_pinned_articles(
173-
user=user,
174-
product=product,
175-
locale=locale,
176-
fetch_for_aaq=fetch_for_aaq,
177-
)
178-
)
170+
parent_prefix = "parent__" if locale != settings.WIKI_DEFAULT_LANGUAGE else ""
179171

180-
if (num_pinned_articles := len(pinned_articles)) == limit:
181-
return pinned_articles
182-
183-
elif num_pinned_articles > limit:
184-
return random.sample(pinned_articles, limit)
185-
186-
# Here are some key points about the following query:
187-
# (1) It will include all KB articles that are visible to the provided user. This
188-
# means that if the user has permission to view restricted articles, those
189-
# restricted articles will be included in the query. If the user is None, the
190-
# user is considered anonymous in terms of viewing permissions.
191-
# (2) It uses the annotated "root_id", which will be the id of the parent, to filter
192-
# for articles matching the given product or one of the given topics, because
193-
# only the products and topics of an article's parent should be considered. It
194-
# also use "root_id" to get an article's Google Analytics pageviews, so only
195-
# the pageviews of a localized article's parent are used.
196-
# (3) We're excluding templates, archived articles, redirects, as well as any pinned
197-
# articles since the pinned articles are handled separately.
198-
# (4) If no product was provided, we're effectively asking for featured articles for
199-
# the home page, and in that case we also exclude articles associated with the
200-
# products defined in settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES.
201-
# (5) We're only using the most-visited articles, limited to a number that depends
202-
# on the provided "limit".
203-
qs = (
204-
Document.objects.visible(user, locale=locale, is_template=False, is_archived=False)
205-
.exclude(html__startswith=REDIRECT_HTML)
206-
.annotate(root_id=Coalesce(F("parent_id"), F("id")))
207-
)
208-
209-
if pinned_articles:
210-
qs = qs.exclude(id__in=[doc.id for doc in pinned_articles])
172+
filter_kwargs = {
173+
f"{parent_prefix}restrict_to_groups__isnull": True,
174+
}
211175

212176
if product:
213-
qs = qs.filter(
214-
Exists(
215-
Document.objects.filter(
216-
id=OuterRef("root_id"),
217-
products=product,
218-
)
219-
)
220-
)
221-
else:
222-
# If we're not getting the featured articles for a specific product,
223-
# exclude articles associated with these configured products.
224-
qs = qs.exclude(
225-
Exists(
226-
Document.objects.filter(
227-
id=OuterRef("root_id"),
228-
products__slug__in=settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES,
229-
)
230-
)
231-
)
177+
filter_kwargs[f"{parent_prefix}products"] = product
232178

233179
if topics:
234-
qs = qs.filter(
235-
Exists(
236-
Document.objects.filter(
237-
id=OuterRef("root_id"),
238-
topics__in=topics,
239-
)
240-
)
241-
)
180+
filter_kwargs[f"{parent_prefix}topics__in"] = topics
181+
182+
excluded_product_slugs = settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES
242183

243184
qs = (
244-
qs.select_related("current_revision")
185+
Document.objects.filter(
186+
locale=locale,
187+
is_template=False,
188+
is_archived=False,
189+
current_revision__isnull=False,
190+
**filter_kwargs,
191+
)
192+
.exclude(**{f"{parent_prefix}products__slug__in": excluded_product_slugs})
193+
.exclude(html__startswith=REDIRECT_HTML)
194+
.select_related("current_revision")
245195
.annotate(
246196
num_visits=Subquery(
247197
WikiDocumentVisits.objects.filter(
248-
document=OuterRef("root_id"), period=LAST_7_DAYS
249-
).values("visits")[:1]
198+
document=OuterRef(f"{parent_prefix}pk"), period=LAST_7_DAYS
199+
).values("visits")
250200
)
251201
)
252202
.exclude(num_visits__isnull=True)
253203
.order_by("-num_visits")
254204
)
255205

256-
# Include double the limit of the most visited articles for sampling.
257-
docs = list(qs[: (limit * 2)])
258-
259-
remaining_limit = limit - len(pinned_articles)
206+
if topics:
207+
# Documents that match multiple topics will be repeated,
208+
# so remove any duplicates when we're matching by topics.
209+
qs = qs.distinct()
260210

261-
if len(docs) > remaining_limit:
262-
docs = random.sample(docs, remaining_limit)
211+
# Only include the ten most visited articles for sampling.
212+
docs = list(qs[:10])
263213

264-
return pinned_articles + docs
214+
if len(docs) <= 4:
215+
return docs
216+
return random.sample(docs, 4)
265217

266218

267219
def get_visible_document_or_404(

0 commit comments

Comments
 (0)