Skip to content

Commit 3c51977

Browse files
authored
GENAI-4295 Remove normalization, add more data in inferred (#1340)
* Remove normalization, add more data in inferred * update thresholds * Fixes * reverse print * test fix
1 parent 76ae5a0 commit 3c51977

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

merino/curated_recommendations/ml_backends/static_local_model.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,13 @@ def get_topic(topic: str) -> InterestVectorConfig:
169169

170170

171171
THRESHOLDS_V3_NORMALIZED = [0.3, 0.5, 0.8]
172+
THRESHOLDS_V3_NON_NORMALIZED = [0.002, 0.008, 0.017]
173+
THRESHOLDS_V3_NON_NORMALIZED_ALL_TOPICS = [0.0001, 0.002, 0.004]
172174

173175
SUBTOPIC_TOPIC_BLEND_RATIO = 0.15
174176

177+
SPECIAL_ALL_TOPIC_KEYWOWRD = "all"
178+
175179

176180
# Creates a limited model based on topics. Topics features are stored with a t_
177181
# in telemetry.
@@ -194,12 +198,19 @@ class SuperInferredModel(LocalModelBackend):
194198
Topic.FOOD.value,
195199
Topic.TECHNOLOGY.value,
196200
Topic.SCIENCE.value,
197-
Topic.PERSONAL_FINANCE.value,
201+
SPECIAL_ALL_TOPIC_KEYWOWRD,
198202
]
199203
limited_topics_set = set(v3_limited_topics)
200204

201205
@staticmethod
202206
def _get_topic(topic: str, thresholds: list[float]) -> InterestVectorConfig:
207+
if topic == SPECIAL_ALL_TOPIC_KEYWOWRD:
208+
return InterestVectorConfig(
209+
features={f"t_{t}": 1 for t in BASE_TOPICS},
210+
thresholds=THRESHOLDS_V3_NON_NORMALIZED_ALL_TOPICS,
211+
diff_p=MODEL_P_VALUE_V3,
212+
diff_q=MODEL_Q_VALUE_V3,
213+
)
203214
return InterestVectorConfig(
204215
features={f"t_{topic}": 1},
205216
thresholds=thresholds,
@@ -218,7 +229,7 @@ def _get_section(section_name: str, thresholds: list[float]) -> InterestVectorCo
218229
)
219230

220231
def _build_local(self, model_id, surface_id) -> InferredLocalModel | None:
221-
model_thresholds = THRESHOLDS_V3_NORMALIZED
232+
model_thresholds = THRESHOLDS_V3_NON_NORMALIZED
222233
private_features: list[str] | None = None
223234

224235
if model_id == SERVER_V3_MODEL_ID:
@@ -236,7 +247,7 @@ def _build_local(self, model_id, surface_id) -> InferredLocalModel | None:
236247
topic_features = {a: self._get_topic(a, model_thresholds) for a in self.v3_limited_topics}
237248
model_data: ModelData = ModelData(
238249
model_type=ModelType.CTR,
239-
rescale=True,
250+
rescale=False,
240251
noise_scale=0.0,
241252
day_time_weighting=DayTimeWeightingConfig(
242253
days=[30],

merino/curated_recommendations/rankers/contextual_ranker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
# These topics are in the current interest vector but not being used to determine the
4545
# cohort selection.
4646
CONTEXUAL_INFERRED_PER_TOPIC_WEIGHTING = {
47-
Topic.PERSONAL_FINANCE: 1.2,
4847
Topic.TECHNOLOGY: 1.0,
4948
Topic.POLITICS: 0.3,
5049
Topic.ARTS: 0.3,

tests/unit/curated_recommendations/ml_backends/test_static_local_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from merino.curated_recommendations.corpus_backends.protocol import Topic, SurfaceId
77
from merino.curated_recommendations.ml_backends.static_local_model import (
88
SERVER_V3_MODEL_ID,
9-
THRESHOLDS_V3_NORMALIZED,
9+
THRESHOLDS_V3_NON_NORMALIZED,
1010
FakeLocalModelSections,
1111
SuperInferredModel,
1212
CTR_SECTION_MODEL_ID,
@@ -111,7 +111,7 @@ def test_model_returns_default_limited_model(model_limited):
111111
# test a specific threshold value
112112
assert (
113113
result.model_data.interest_vector[Topic.SPORTS.value].thresholds[0]
114-
== THRESHOLDS_V3_NORMALIZED[0]
114+
== THRESHOLDS_V3_NON_NORMALIZED[0]
115115
)
116116

117117

0 commit comments

Comments
 (0)