|
6 | 6 | from django.conf import settings |
7 | 7 | from django.contrib.auth.models import User |
8 | 8 | from django.contrib.sessions.backends.base import SessionBase |
9 | | -from django.db.models import Exists, F, OuterRef, Q, Subquery |
10 | | -from django.db.models.functions import Coalesce, Now |
| 9 | +from django.db.models import OuterRef, Q, Subquery |
| 10 | +from django.db.models.functions import Now |
11 | 11 | from django.db.models.query import QuerySet |
12 | 12 | from django.http import Http404, HttpRequest |
13 | 13 | from django.shortcuts import get_object_or_404 |
@@ -153,115 +153,67 @@ def get_featured_articles( |
153 | 153 | locale=settings.WIKI_DEFAULT_LANGUAGE, |
154 | 154 | fetch_for_aaq=False, |
155 | 155 | limit=4, |
156 | | -) -> list[Document]: |
157 | | - """ |
158 | | - Returns a list of up to "limit" KB articles, first including any pinned articles, and |
159 | | - for any remaining slots, a random selection of the most visited. |
| 156 | +): |
| 157 | + """Returns up to 4 random articles from the most visited. |
| 158 | +
|
| 159 | + REVERTED to simple pre-3d20c307d version for production stability. |
| 160 | + The complex Coalesce + Exists subqueries caused 100% DB CPU. |
160 | 161 |
|
161 | 162 | Args: |
162 | | - user: Optional user for visibility |
| 163 | + user: Optional user for visibility (currently ignored, uses old simple logic) |
163 | 164 | product: Optional product to filter by |
164 | 165 | topics: Optional iterable of topics to filter by |
165 | | - locale: Optional locale to get articles for, defaults to WIKI_DEFAULT_LANGUAGE |
166 | | - fetch_for_aaq: Optional boolean for indicating that we're fetching articles for the AAQ |
167 | | - limit: Optional integer to limit the number of articles in the queryset |
168 | | - Returns: |
169 | | - A list of up to "limit" Document instances |
| 166 | + locale: Locale to get articles for, defaults to WIKI_DEFAULT_LANGUAGE |
| 167 | + fetch_for_aaq: Optional boolean (currently ignored) |
| 168 | + limit: Optional integer to limit the number of articles (currently ignored, returns 4) |
170 | 169 | """ |
171 | | - pinned_articles = list( |
172 | | - get_pinned_articles( |
173 | | - user=user, |
174 | | - product=product, |
175 | | - locale=locale, |
176 | | - fetch_for_aaq=fetch_for_aaq, |
177 | | - ) |
178 | | - ) |
| 170 | + parent_prefix = "parent__" if locale != settings.WIKI_DEFAULT_LANGUAGE else "" |
179 | 171 |
|
180 | | - if (num_pinned_articles := len(pinned_articles)) == limit: |
181 | | - return pinned_articles |
182 | | - |
183 | | - elif num_pinned_articles > limit: |
184 | | - return random.sample(pinned_articles, limit) |
185 | | - |
186 | | - # Here are some key points about the following query: |
187 | | - # (1) It will include all KB articles that are visible to the provided user. This |
188 | | - # means that if the user has permission to view restricted articles, those |
189 | | - # restricted articles will be included in the query. If the user is None, the |
190 | | - # user is considered anonymous in terms of viewing permissions. |
191 | | - # (2) It uses the annotated "root_id", which will be the id of the parent, to filter |
192 | | - # for articles matching the given product or one of the given topics, because |
193 | | - # only the products and topics of an article's parent should be considered. It |
194 | | - # also use "root_id" to get an article's Google Analytics pageviews, so only |
195 | | - # the pageviews of a localized article's parent are used. |
196 | | - # (3) We're excluding templates, archived articles, redirects, as well as any pinned |
197 | | - # articles since the pinned articles are handled separately. |
198 | | - # (4) If no product was provided, we're effectively asking for featured articles for |
199 | | - # the home page, and in that case we also exclude articles associated with the |
200 | | - # products defined in settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES. |
201 | | - # (5) We're only using the most-visited articles, limited to a number that depends |
202 | | - # on the provided "limit". |
203 | | - qs = ( |
204 | | - Document.objects.visible(user, locale=locale, is_template=False, is_archived=False) |
205 | | - .exclude(html__startswith=REDIRECT_HTML) |
206 | | - .annotate(root_id=Coalesce(F("parent_id"), F("id"))) |
207 | | - ) |
208 | | - |
209 | | - if pinned_articles: |
210 | | - qs = qs.exclude(id__in=[doc.id for doc in pinned_articles]) |
| 172 | + filter_kwargs = { |
| 173 | + f"{parent_prefix}restrict_to_groups__isnull": True, |
| 174 | + } |
211 | 175 |
|
212 | 176 | if product: |
213 | | - qs = qs.filter( |
214 | | - Exists( |
215 | | - Document.objects.filter( |
216 | | - id=OuterRef("root_id"), |
217 | | - products=product, |
218 | | - ) |
219 | | - ) |
220 | | - ) |
221 | | - else: |
222 | | - # If we're not getting the featured articles for a specific product, |
223 | | - # exclude articles associated with these configured products. |
224 | | - qs = qs.exclude( |
225 | | - Exists( |
226 | | - Document.objects.filter( |
227 | | - id=OuterRef("root_id"), |
228 | | - products__slug__in=settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES, |
229 | | - ) |
230 | | - ) |
231 | | - ) |
| 177 | + filter_kwargs[f"{parent_prefix}products"] = product |
232 | 178 |
|
233 | 179 | if topics: |
234 | | - qs = qs.filter( |
235 | | - Exists( |
236 | | - Document.objects.filter( |
237 | | - id=OuterRef("root_id"), |
238 | | - topics__in=topics, |
239 | | - ) |
240 | | - ) |
241 | | - ) |
| 180 | + filter_kwargs[f"{parent_prefix}topics__in"] = topics |
| 181 | + |
| 182 | + excluded_product_slugs = settings.EXCLUDE_PRODUCT_SLUGS_FEATURED_ARTICLES |
242 | 183 |
|
243 | 184 | qs = ( |
244 | | - qs.select_related("current_revision") |
| 185 | + Document.objects.filter( |
| 186 | + locale=locale, |
| 187 | + is_template=False, |
| 188 | + is_archived=False, |
| 189 | + current_revision__isnull=False, |
| 190 | + **filter_kwargs, |
| 191 | + ) |
| 192 | + .exclude(**{f"{parent_prefix}products__slug__in": excluded_product_slugs}) |
| 193 | + .exclude(html__startswith=REDIRECT_HTML) |
| 194 | + .select_related("current_revision") |
245 | 195 | .annotate( |
246 | 196 | num_visits=Subquery( |
247 | 197 | WikiDocumentVisits.objects.filter( |
248 | | - document=OuterRef("root_id"), period=LAST_7_DAYS |
249 | | - ).values("visits")[:1] |
| 198 | + document=OuterRef(f"{parent_prefix}pk"), period=LAST_7_DAYS |
| 199 | + ).values("visits") |
250 | 200 | ) |
251 | 201 | ) |
252 | 202 | .exclude(num_visits__isnull=True) |
253 | 203 | .order_by("-num_visits") |
254 | 204 | ) |
255 | 205 |
|
256 | | - # Include double the limit of the most visited articles for sampling. |
257 | | - docs = list(qs[: (limit * 2)]) |
258 | | - |
259 | | - remaining_limit = limit - len(pinned_articles) |
| 206 | + if topics: |
| 207 | + # Documents that match multiple topics will be repeated, |
| 208 | + # so remove any duplicates when we're matching by topics. |
| 209 | + qs = qs.distinct() |
260 | 210 |
|
261 | | - if len(docs) > remaining_limit: |
262 | | - docs = random.sample(docs, remaining_limit) |
| 211 | + # Only include the ten most visited articles for sampling. |
| 212 | + docs = list(qs[:10]) |
263 | 213 |
|
264 | | - return pinned_articles + docs |
| 214 | + if len(docs) <= 4: |
| 215 | + return docs |
| 216 | + return random.sample(docs, 4) |
265 | 217 |
|
266 | 218 |
|
267 | 219 | def get_visible_document_or_404( |
|
0 commit comments