Skip to content

Commit 63dee37

Browse files
authored
[HNT-1289] Enable Custom Sections globally (#1179)
* [HNT-1289] Enable Custom Sections globally * Fix lint and tests errors * Clean up test code * Fix lint errors
1 parent a424cb1 commit 63dee37

File tree

4 files changed

+90
-141
lines changed

4 files changed

+90
-141
lines changed

merino/curated_recommendations/protocol.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ class ExperimentName(str, Enum):
9898
CONTEXTUAL_AD_V2_BETA_EXPERIMENT = "new-tab-contextual-ad-updates-v2-beta"
9999
CONTEXTUAL_AD_RELEASE_EXPERIMENT = "new-tab-contextual-ad-updates-release"
100100
CONTEXTUAL_AD_V2_RELEASE_EXPERIMENT = "new-tab-contextual-ad-updates-v2-release"
101-
NEW_TAB_CUSTOM_SECTIONS_EXPERIMENT = "new-tab-custom-sections"
102101
# Experiment for doing local reranking of popular today via inferred interests
103102
INFERRED_LOCAL_EXPERIMENT = "new-tab-automated-personalization-local-ranking"
104103
INFERRED_LOCAL_EXPERIMENT_V2 = "new-tab-automated-personalization-local-ranking-2"

merino/curated_recommendations/sections.py

Lines changed: 11 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -170,17 +170,15 @@ async def get_corpus_sections(
170170
min_feed_rank: int,
171171
include_subtopics: bool = False,
172172
scheduled_surface_backend: ScheduledSurfaceProtocol | None = None,
173-
is_custom_sections_experiment: bool = False,
174173
) -> tuple[Section | None, dict[str, Section]]:
175-
"""Fetch editorially curated sections with optional RSS vs. Zyte experiment filtering.
174+
"""Fetch curated sections.
176175
177176
Args:
178177
sections_backend: Backend interface to fetch corpus sections.
179178
surface_id: Identifier for which surface to fetch sections.
180179
min_feed_rank: Starting rank offset for assigning receivedFeedRank.
181180
include_subtopics: Whether to include subtopic sections.
182181
scheduled_surface_backend: Backend interface to fetch scheduled corpus items (temporary)
183-
is_custom_sections_experiment: Whether custom sections experiment is enabled.
184182
185183
Returns:
186184
A tuple of headlines section (if present) & a mapping from section IDs to Section objects, each with a unique receivedFeedRank.
@@ -203,11 +201,10 @@ async def get_corpus_sections(
203201
is_legacy_section=False,
204202
)
205203

206-
# Apply RSS vs. Zyte experiment filtering and custom sections filtering
204+
# Apply filtering based on subtopics experiment
207205
filtered_corpus_sections = filter_sections_by_experiment(
208206
remaining_raw_corpus_sections,
209207
include_subtopics,
210-
is_custom_sections_experiment,
211208
)
212209

213210
# Process the sections using the shared logic, passing the dict directly
@@ -310,13 +307,6 @@ def is_scheduler_holdback_experiment(request: CuratedRecommendationsRequest) ->
310307
)
311308

312309

313-
def is_custom_sections_experiment(request: CuratedRecommendationsRequest) -> bool:
314-
"""Return True if custom sections should be included based on experiments."""
315-
return is_enrolled_in_experiment(
316-
request, ExperimentName.NEW_TAB_CUSTOM_SECTIONS_EXPERIMENT.value, "treatment"
317-
)
318-
319-
320310
def get_ranking_rescaler_for_branch(
321311
request: CuratedRecommendationsRequest,
322312
) -> ExperimentRescaler | None:
@@ -351,17 +341,20 @@ def get_corpus_sections_for_legacy_topic(
351341
def filter_sections_by_experiment(
352342
corpus_sections: list[CorpusSection],
353343
include_subtopics: bool = False,
354-
is_custom_sections_experiment: bool = False,
355344
) -> dict[str, CorpusSection]:
356-
"""Filter sections based on RSS vs. Zyte experiment branch and custom sections experiment.
345+
"""Filter sections based on createSource and subtopics experiment.
346+
347+
Sections are included if they meet any of these criteria:
348+
- Manually created sections (createSource == MANUAL)
349+
- ML-generated legacy topic sections
350+
- ML-generated subtopic sections (when subtopics experiment is enabled)
357351
358352
Args:
359353
corpus_sections: List of CorpusSection objects
360-
include_subtopics: Whether to include subtopic sections
361-
is_custom_sections_experiment: Whether custom sections experiment is enabled
354+
include_subtopics: Whether to include ML subtopic sections
362355
363356
Returns:
364-
Filtered sections
357+
Dict mapping section IDs to CorpusSection objects
365358
"""
366359
legacy_topics = get_legacy_topic_ids()
367360
result = {}
@@ -370,20 +363,9 @@ def filter_sections_by_experiment(
370363
section_id = section.externalId
371364
base_id = section_id
372365
is_legacy = base_id in legacy_topics
373-
# is_legacy = base_id in legacy_topics
374366
is_manual_section = section.createSource == CreateSource.MANUAL
375367

376-
# Custom sections experiment: only include MANUAL sections in treatment, exclude them in control
377-
if is_custom_sections_experiment:
378-
# Treatment: only include MANUAL sections
379-
if is_manual_section:
380-
result[base_id] = section
381-
continue
382-
383-
# Control/default: exclude MANUAL sections
384-
if is_manual_section:
385-
continue
386-
if is_legacy or include_subtopics:
368+
if is_manual_section or is_legacy or include_subtopics:
387369
result[base_id] = section
388370

389371
return result
@@ -586,9 +568,6 @@ async def get_sections(
586568
# Determine if we should include subtopics based on experiments
587569
include_subtopics = is_subtopics_experiment(request)
588570

589-
# Determine if custom sections experiment is enabled
590-
custom_sections_enabled = is_custom_sections_experiment(request)
591-
592571
rescaler = get_ranking_rescaler_for_branch(request)
593572

594573
headlines_corpus_section, corpus_sections_all = await get_corpus_sections(
@@ -597,7 +576,6 @@ async def get_sections(
597576
min_feed_rank=1,
598577
include_subtopics=include_subtopics,
599578
scheduled_surface_backend=scheduled_surface_backend,
600-
is_custom_sections_experiment=custom_sections_enabled,
601579
)
602580

603581
# Determine if we should include headlines section based on daily briefing experiment

tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py

Lines changed: 50 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
from typing import Any
88
from unittest.mock import AsyncMock
9+
from uuid import UUID
910

1011
import aiodogstatsd
1112
from fastapi.testclient import TestClient
@@ -65,6 +66,18 @@
6566
from tests.types import FilterCaplogFixture
6667

6768

69+
def is_manual_section(section_id: str) -> bool:
70+
"""Check if section ID is a UUID (manually created sections use UUIDs, ML sections use human-readable IDs).
71+
72+
Note: This heuristic may become obsolete if all sections adopt UUID identifiers in the future.
73+
"""
74+
try:
75+
UUID(section_id)
76+
return True
77+
except ValueError:
78+
return False
79+
80+
6881
class MockEngagementBackend(EngagementBackend):
6982
"""Mock class implementing the protocol for EngagementBackend.
7083
experiment_traffic_fraction defines a fraction of traffic expected for an experiment
@@ -1344,11 +1357,12 @@ def test_sections_legacy_holdback(self, experiment_payload, client: TestClient):
13441357
# Assert layouts are cycled
13451358
assert_section_layouts_are_cycled(sections)
13461359

1347-
# The only sections are topic sections or "top_stories_section"
1348-
assert all(
1349-
section_name == "top_stories_section" or section_name in Topic
1350-
for section_name in sections
1351-
)
1360+
# Should have top_stories_section and legacy topic sections
1361+
# (may also have manually created sections)
1362+
assert "top_stories_section" in sections
1363+
legacy_topics = {topic.value for topic in Topic}
1364+
legacy_sections_present = [sid for sid in sections if sid in legacy_topics]
1365+
assert len(legacy_sections_present) > 0, "Should have at least some legacy topic sections"
13521366

13531367
@pytest.mark.parametrize("locale", ["en-US", "de-DE"])
13541368
@pytest.mark.parametrize(
@@ -1395,14 +1409,16 @@ def test_sections_feed_content(self, locale, experiment_payload, caplog, client:
13951409
recs = section["recommendations"]
13961410
assert {rec["receivedRank"] for rec in recs} == set(range(len(recs)))
13971411

1398-
# Check if non-ML experiment, only legacy sections returned
1412+
# Check section types based on experiment
13991413
legacy_topics = {topic.value for topic in Topic}
14001414

14011415
if experiment_payload.get("experimentName") != ExperimentName.ML_SECTIONS_EXPERIMENT.value:
1402-
# Non-ML sections experiment: All section keys (excluding top_stories) must be in legacy topics
1416+
# Non-ML sections experiment: Should have legacy topics and may have manually created sections
1417+
# but should not have ML subtopics
14031418
for sid in sections:
1404-
if sid != "top_stories_section":
1405-
assert sid in legacy_topics
1419+
if sid != "top_stories_section" and sid not in legacy_topics:
1420+
# Non-legacy sections should only be manually created sections
1421+
assert is_manual_section(sid), f"Unexpected section type: {sid}"
14061422

14071423
# Check the recs used in top_stories_section are removed from their original ML sections.
14081424
top_story_ids = {
@@ -1432,28 +1448,16 @@ def test_sections_feed_content(self, locale, experiment_payload, caplog, client:
14321448
== "Insider advice on where to eat, what to see, and how to enjoy the city like a local."
14331449
)
14341450

1435-
@pytest.mark.parametrize(
1436-
"branch,should_have_manual,should_have_ml",
1437-
[
1438-
("treatment", True, False),
1439-
("control", False, True),
1440-
],
1441-
)
1442-
def test_custom_sections_experiment(
1443-
self, branch: str, should_have_manual: bool, should_have_ml: bool, client: TestClient
1444-
):
1445-
"""Test custom sections experiment filters sections by createSource.
1451+
def test_sections_include_both_manual_and_ml(self, client: TestClient):
1452+
"""Test that sections feed includes both manually created and ML-generated sections.
14461453
1447-
Treatment: Returns only MANUAL sections (createSource == "MANUAL")
1448-
Control: Excludes MANUAL sections (only ML sections returned)
1454+
Both MANUAL and ML sections should be returned together.
14491455
"""
14501456
response = client.post(
14511457
"/api/v1/curated-recommendations",
14521458
json={
14531459
"locale": "en-US",
14541460
"feeds": ["sections"],
1455-
"experimentName": ExperimentName.NEW_TAB_CUSTOM_SECTIONS_EXPERIMENT.value,
1456-
"experimentBranch": branch,
14571461
},
14581462
)
14591463
data = response.json()
@@ -1467,30 +1471,19 @@ def test_custom_sections_experiment(
14671471
# top_stories_section should always be present
14681472
assert "top_stories_section" in sections
14691473

1470-
manual_section_id = "d532b687-108a-4edb-a076-58a6945de714"
1471-
1472-
if should_have_manual:
1473-
# Treatment: Should NOT have ML sections
1474-
assert "music" not in sections
1475-
assert "nfl" not in sections
1476-
assert "tv" not in sections
1477-
assert "movies" not in sections
1478-
assert "nba" not in sections
1479-
1480-
# The MANUAL section "Tech stuff" may or may not appear depending on whether
1481-
# it has enough items after top stories are removed, but if it does appear,
1482-
# verify it has the correct title
1483-
if manual_section_id in sections:
1484-
assert sections[manual_section_id]["title"] == "Tech stuff"
1485-
else:
1486-
# Control: Should NOT have the MANUAL section
1487-
assert manual_section_id not in sections
1488-
1489-
# Should have ML sections (legacy topics only since not ML experiment)
1490-
legacy_topics = {topic.value for topic in Topic}
1491-
for sid in sections:
1492-
if sid != "top_stories_section":
1493-
assert sid in legacy_topics
1474+
# Should have ML sections (legacy topics)
1475+
legacy_topics = {topic.value for topic in Topic}
1476+
ml_sections_found = [sid for sid in sections if sid in legacy_topics]
1477+
assert len(ml_sections_found) > 0, "Should have at least some ML legacy topic sections"
1478+
1479+
# Check if any manually created sections appear (they may or may not, depending on
1480+
# whether they have enough items after top stories are removed)
1481+
manual_sections = [sid for sid in sections if is_manual_section(sid)]
1482+
if manual_sections:
1483+
# If the "Tech stuff" manual section appears, verify it has the correct title
1484+
tech_stuff_id = "d532b687-108a-4edb-a076-58a6945de714"
1485+
if tech_stuff_id in sections:
1486+
assert sections[tech_stuff_id]["title"] == "Tech stuff"
14941487

14951488
@pytest.mark.parametrize(
14961489
"sections_payload",
@@ -1707,18 +1700,24 @@ def test_sections_filtering_by_region_and_holdback(
17071700
and experiment_branch == "control"
17081701
)
17091702

1703+
# Categorize non-legacy, non-top_stories sections
17101704
non_legacy_section_ids = [
17111705
sid
17121706
for sid in sections
17131707
if sid not in legacy_topics and sid not in {"top_stories_section"}
17141708
]
1709+
ml_subtopic_section_ids = [
1710+
sid for sid in non_legacy_section_ids if not is_manual_section(sid)
1711+
]
17151712

17161713
if expect_subtopics:
1717-
assert non_legacy_section_ids, "Expected subtopic sections for US treatment"
1714+
assert ml_subtopic_section_ids, "Expected ML subtopic sections for US treatment"
17181715
else:
17191716
assert (
1720-
not non_legacy_section_ids
1721-
), f"Unexpected non-legacy sections: {non_legacy_section_ids}"
1717+
not ml_subtopic_section_ids
1718+
), f"Unexpected ML subtopic sections: {ml_subtopic_section_ids}"
1719+
1720+
# Manually created sections may appear regardless of experiment settings
17221721

17231722
def test_daily_briefing_experiment_headlines_section_returned(self, client: TestClient):
17241723
"""Test that the Headlines section is returned when the daily briefing experiment is enabled.

0 commit comments

Comments
 (0)