mozilla-services
diff --git a/‎merino/curated_recommendations/corpus_backends/sections_backend.py‎
Lines changed: 1 addition & 1 deletion b/‎merino/curated_recommendations/corpus_backends/sections_backend.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎merino/curated_recommendations/ml_backends/static_local_model.py‎
Lines changed: 24 additions & 25 deletions b/‎merino/curated_recommendations/ml_backends/static_local_model.py‎
Lines changed: 24 additions & 25 deletions
diff --git a/‎merino/curated_recommendations/prior_backends/experiment_rescaler.py‎
Lines changed: 1 addition & 1 deletion b/‎merino/curated_recommendations/prior_backends/experiment_rescaler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎merino/curated_recommendations/protocol.py‎
Lines changed: 0 additions & 11 deletions b/‎merino/curated_recommendations/protocol.py‎
Lines changed: 0 additions & 11 deletions
diff --git a/‎merino/curated_recommendations/sections.py‎
Lines changed: 9 additions & 70 deletions b/‎merino/curated_recommendations/sections.py‎
Lines changed: 9 additions & 70 deletions
@@ -126,7 +126,7 @@ async def fetch(self, surface_id: SurfaceId) -> list[CorpusSection]:
         utm_source = get_utm_source(surface_id)
         sections_list = []
         for section in data["data"]["getSections"]:
-            if not section.get("active"):
+            if not section.get("active") or section.get("externalId", "").endswith("_crawl"):
                 logger.info(f"Skipping inactive section {section['externalId']} for {surface_id}")
                 continue
 
 
@@ -55,6 +55,7 @@
     "travel",
 ]
 
+BASE_TOPICS_SET = set(BASE_TOPICS)
 
 BASE_SECTIONS_FOR_LOCAL_MODEL = [
     "nfl",
@@ -66,22 +67,22 @@
     "movies",
     "music",
     "books",
-    "business_crawl",
-    "career_crawl",
-    "arts_crawl",
-    "food_crawl",
-    "health_crawl",
-    "home_crawl",
-    "finance_crawl",
-    "government_crawl",
-    "sports_crawl",
-    "tech_crawl",
-    "travel_crawl",
-    "education_crawl",
-    "hobbies_crawl",
-    "society-parenting_crawl",
-    "education-science_crawl",
-    "society_crawl",
+    "business",
+    "career",
+    "arts",
+    "food",
+    "health",
+    "home",
+    "finance",
+    "government",
+    "sports",
+    "tech",
+    "travel",
+    "education",
+    "hobbies",
+    "society-parenting",
+    "education-science",
+    "society",
 ]
 
 
@@ -147,8 +148,6 @@ def get_topic(topic: str) -> InterestVectorConfig:
 THRESHOLDS_V1_A = [0.008, 0.016, 0.024]
 THRESHOLDS_V1_B = [0.005, 0.010, 0.015]
 
-CRAWL_SUFFIX = "_crawl"
-
 
 # Creates a limited model based on topics. Topics features are stored with a t_
 # in telemetry.
@@ -172,10 +171,6 @@ class SuperInferredModel(LocalModelBackend):
 
     default_model_id = DEFAULT_PRODUCTION_MODEL_ID
 
-    @staticmethod
-    def _clean_section(section_name: str):
-        return section_name.replace(CRAWL_SUFFIX, "")
-
     @staticmethod
     def _get_topic(topic: str, thresholds: list[float]) -> InterestVectorConfig:
         return InterestVectorConfig(
@@ -187,8 +182,13 @@ def _get_topic(topic: str, thresholds: list[float]) -> InterestVectorConfig:
 
     @staticmethod
     def _get_section(section_name: str, thresholds: list[float]) -> InterestVectorConfig:
+        features = (
+            {f"s_{section_name}": 1, f"s_{section_name}_crawl": 1}
+            if section_name in BASE_TOPICS_SET
+            else {f"s_{section_name}": 1}
+        )
         return InterestVectorConfig(
-            features={f"s_{section_name}": 1},
+            features=features,
             thresholds=thresholds,
             diff_p=MODEL_P_VALUE_V1,
             diff_q=MODEL_Q_VALUE_V1,
@@ -245,8 +245,7 @@ def _build_local(self, model_id, surface_id) -> InferredLocalModel | None:
         else:
             return None
         category_fields = {
-            self._clean_section(a): self._get_section(a, model_thresholds)
-            for a in BASE_SECTIONS_FOR_LOCAL_MODEL
+            a: self._get_section(a, model_thresholds) for a in BASE_SECTIONS_FOR_LOCAL_MODEL
         }  ## all sections
         model_data: ModelData = ModelData(
             model_type=ModelType.CTR,
 
@@ -15,7 +15,7 @@
 PESSIMISTIC_PRIOR_ALPHA_SCALE_SUBTOPIC = 0.35
 
 
-class DefaultCrawlerRescaler(ExperimentRescaler):
+class DefaultRescaler(ExperimentRescaler):
     """Scales based on overall percentage"""
 
     def __init__(self, **data: Any):
 
@@ -91,8 +91,6 @@ class ExperimentName(str, Enum):
     RSS_VS_ZYTE_EXPERIMENT = "new-ranking-for-legacy-topics-in-new-tab-v1"
     # Experiment to display Daily Briefing section as the first section on New Tab
     DAILY_BRIEFING_EXPERIMENT = "daily-briefing-v1"
-    # Experiment slug for crawling with identical behavior/branches as RSS_VS_ZYTE_EXPERIMENT
-    NEW_TAB_CRAWLING_V2 = "new-tab-crawling-v2"
     # The following are 6 experiments to apply 1 row layout for Popular Today for contextual ads
     CONTEXTUAL_AD_NIGHTLY_EXPERIMENT = "new-tab-ad-updates-nightly"
     CONTEXTUAL_AD_V2_NIGHTLY_EXPERIMENT = "new-tab-contextual-ad-updates-v2-nightly"
@@ -106,15 +104,6 @@ class ExperimentName(str, Enum):
     INFERRED_LOCAL_EXPERIMENT_V2 = "new-tab-automated-personalization-local-ranking-2"
 
 
-@unique
-class CrawlExperimentBranchName(str, Enum):
-    """Branch names for the RSS vs. Zyte (crawl) experiment."""
-
-    CONTROL = "control"
-    TREATMENT_CRAWL = "treatment-crawl"
-    TREATMENT_CRAWL_PLUS_SUBTOPICS = "treatment-crawl-subtopics"
-
-
 # Maximum tileId that Firefox can support. Firefox uses Javascript to store this value. The max
 # value of a Javascript number can be found using `Number.MAX_SAFE_INTEGER`. which is 2^53 - 1
 # because it uses a 64-bit IEEE 754 float.
 
@@ -23,7 +23,6 @@
 from merino.curated_recommendations.prior_backends.experiment_rescaler import (
     SchedulerHoldbackRescaler,
     SUBTOPIC_EXPERIMENT_CURATED_ITEM_FLAG,
-    DefaultCrawlerRescaler,
 )
 from merino.curated_recommendations.prior_backends.protocol import PriorBackend, ExperimentRescaler
 from merino.curated_recommendations.protocol import (
@@ -33,7 +32,6 @@
     SectionConfiguration,
     ExperimentName,
     ProcessedInterests,
-    CrawlExperimentBranchName,
     Layout,
 )
 from merino.curated_recommendations.rankers import (
@@ -56,7 +54,6 @@
 DOUBLE_ROW_TOP_STORIES_COUNT = 9
 TOP_STORIES_SECTION_EXTRA_COUNT = 5  # Extra top stories pulled from later sections
 HEADLINES_SECTION_KEY = "headlines_section"
-HEADLINES_CRAWL_SECTION_KEY = "headlines_crawl"
 
 
 def map_section_item_to_recommendation(
@@ -170,7 +167,6 @@ async def get_corpus_sections(
     sections_backend: SectionsProtocol,
     surface_id: SurfaceId,
     min_feed_rank: int,
-    crawl_branch: str | None = None,
     include_subtopics: bool = False,
     scheduled_surface_backend: ScheduledSurfaceProtocol | None = None,
     is_custom_sections_experiment: bool = False,
@@ -181,7 +177,6 @@ async def get_corpus_sections(
         sections_backend: Backend interface to fetch corpus sections.
         surface_id: Identifier for which surface to fetch sections.
         min_feed_rank: Starting rank offset for assigning receivedFeedRank.
-        crawl_branch: The crawl experiment branch name or None.
         include_subtopics: Whether to include subtopic sections.
         scheduled_surface_backend: Backend interface to fetch scheduled corpus items (temporary)
         is_custom_sections_experiment: Whether custom sections experiment is enabled.
@@ -210,7 +205,6 @@ async def get_corpus_sections(
     # Apply RSS vs. Zyte experiment filtering and custom sections filtering
     filtered_corpus_sections = filter_sections_by_experiment(
         remaining_raw_corpus_sections,
-        crawl_branch,
         include_subtopics,
         is_custom_sections_experiment,
     )
@@ -228,11 +222,11 @@ async def get_corpus_sections(
 def split_headlines_section(
     corpus_sections: list[CorpusSection],
 ) -> tuple[CorpusSection | None, list[CorpusSection]]:
-    """Return the headlines_crawl section separately from everything else."""
+    """Return the headlines section separately from everything else."""
     headlines_section: CorpusSection | None = None
     remaining_sections: list[CorpusSection] = []
     for cs in corpus_sections:
-        if cs.externalId == HEADLINES_CRAWL_SECTION_KEY:
+        if cs.externalId == HEADLINES_SECTION_KEY:
             headlines_section = cs
         else:
             remaining_sections.append(cs)
@@ -302,7 +296,6 @@ def is_subtopics_experiment(request: CuratedRecommendationsRequest) -> bool:
 
     Include subtopics if:
     - ML sections experiment is enabled (treatment branch), OR
-    - Crawl experiment is in the TREATMENT_CRAWL_PLUS_SUBTOPICS branch
     """
     in_holdback = is_scheduler_holdback_experiment(request)
     # Subtopics only in the US
@@ -323,30 +316,14 @@ def is_custom_sections_experiment(request: CuratedRecommendationsRequest) -> boo
     )
 
 
-def get_crawl_experiment_branch(request: CuratedRecommendationsRequest) -> str | None:
-    """Return the branch name for the RSS vs. Zyte experiment
-
-    Branches:
-    - control: Non-crawl legacy topics only
-    - treatment-crawl: Crawl legacy topics only
-    - treatment-crawl-subtopics: Crawl legacy topics + non-crawl subtopics
-
-    """
-    if is_scheduler_holdback_experiment(request) or request.region != "US":
-        return CrawlExperimentBranchName.CONTROL.value
-
-    return CrawlExperimentBranchName.TREATMENT_CRAWL_PLUS_SUBTOPICS.value
-
-
 def get_ranking_rescaler_for_branch(
     request: CuratedRecommendationsRequest,
 ) -> ExperimentRescaler | None:
     """Get the correct interactions and prior rescaler for the current experiment"""
-    if request.region != "US":
+    if request.region != "US" or not is_scheduler_holdback_experiment(request):
         return None
-    if is_scheduler_holdback_experiment(request):
+    else:
         return SchedulerHoldbackRescaler()
-    return DefaultCrawlerRescaler()
 
 
 def update_received_feed_rank(sections: dict[str, Section]):
@@ -369,43 +346,29 @@ def get_corpus_sections_for_legacy_topic(
     return {sid: section for sid, section in corpus_sections.items() if sid in legacy_topics}
 
 
-def is_crawl_section_id(section_id: str) -> bool:
-    """Check if a section ID represents a crawl section.
-
-    Args:
-        section_id: The section external ID to check
-
-    Returns:
-        True if the section ID ends with '_crawl', False otherwise
-    """
-    return section_id.endswith("_crawl")
-
-
 def filter_sections_by_experiment(
     corpus_sections: list[CorpusSection],
-    crawl_branch: str | None,
     include_subtopics: bool = False,
     is_custom_sections_experiment: bool = False,
 ) -> dict[str, CorpusSection]:
     """Filter sections based on RSS vs. Zyte experiment branch and custom sections experiment.
 
     Args:
         corpus_sections: List of CorpusSection objects
-        crawl_branch: The experiment branch name or None
         include_subtopics: Whether to include subtopic sections
         is_custom_sections_experiment: Whether custom sections experiment is enabled
 
     Returns:
-        Filtered sections with _crawl suffix removed from keys for crawl sections
+        Filtered sections
     """
     legacy_topics = get_legacy_topic_ids()
     result = {}
 
     for section in corpus_sections:
         section_id = section.externalId
-        is_crawl_section = is_crawl_section_id(section_id)
-        base_id = section_id.replace("_crawl", "") if is_crawl_section else section_id
+        base_id = section_id
         is_legacy = base_id in legacy_topics
+        # is_legacy = base_id in legacy_topics
         is_manual_section = section.createSource == CreateSource.MANUAL
 
         # Custom sections experiment: only include MANUAL sections in treatment, exclude them in control
@@ -418,28 +381,8 @@ def filter_sections_by_experiment(
         # Control/default: exclude MANUAL sections
         if is_manual_section:
             continue
-
-        # Determine if we should include this section based on the branch
-        if crawl_branch in [
-            CrawlExperimentBranchName.TREATMENT_CRAWL.value,
-            CrawlExperimentBranchName.TREATMENT_CRAWL_PLUS_SUBTOPICS.value,
-        ]:
-            # Treatment branches: use _crawl for legacy, regular for subtopics
-            if is_legacy and is_crawl_section:
-                result[base_id] = section
-            elif (
-                not is_legacy
-                and not is_crawl_section
-                and crawl_branch == CrawlExperimentBranchName.TREATMENT_CRAWL_PLUS_SUBTOPICS.value
-            ):
-                # Include non-crawl subtopics only in crawl-plus-subtopics branch
-                result[base_id] = section
-        else:
-            # Control branch or no experiment: use non-_crawl sections
-            if not is_crawl_section:
-                # Include based on whether subtopics are enabled
-                if is_legacy or include_subtopics:
-                    result[base_id] = section
+        if is_legacy or include_subtopics:
+            result[base_id] = section
 
     return result
 
@@ -638,9 +581,6 @@ async def get_sections(
     Returns:
         A dict mapping section IDs to fully-configured Section models.
     """
-    # 1. Get corpus sections with RSS vs. Zyte experiment filtering
-    crawl_branch = get_crawl_experiment_branch(request)
-
     # Determine if we should include subtopics based on experiments
     include_subtopics = is_subtopics_experiment(request)
 
@@ -653,7 +593,6 @@ async def get_sections(
         sections_backend=sections_backend,
         surface_id=surface_id,
         min_feed_rank=1,
-        crawl_branch=crawl_branch,
         include_subtopics=include_subtopics,
         scheduled_surface_backend=scheduled_surface_backend,
         is_custom_sections_experiment=custom_sections_enabled,