Skip to content

Commit 3bea071

Browse files
refactor: update profile constants and scoring logic for improved recommendations
1 parent a16683d commit 3bea071

File tree

5 files changed

+46
-42
lines changed

5 files changed

+46
-42
lines changed

app/services/profile/constants.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
CAP_COUNTRY: Final[float] = 20.0
3737

3838
# Recency Decay (exponential decay parameters)
39-
RECENCY_HALF_LIFE_DAYS: Final[float] = 90.0 # 90-day half-life
39+
RECENCY_HALF_LIFE_DAYS: Final[float] = 15.0 # 15-day half-life
4040
RECENCY_DECAY_RATE: Final[float] = 0.98 # Daily decay multiplier (soft decay)
4141

4242
# Smart Sampling
@@ -48,12 +48,12 @@
4848
FREQUENCY_MULTIPLIER_LOG_FACTOR: Final[float] = 0.1 # Subtle boost
4949

5050
# Top Picks Caps (diversity constraints)
51-
TOP_PICKS_RECENCY_CAP: Final[float] = 0.15 # Max 15% recent items (from trending/popular)
52-
TOP_PICKS_GENRE_CAP: Final[float] = 0.30 # Max 30% per genre
53-
TOP_PICKS_CREATOR_CAP: Final[int] = 2 # Max 2 items per creator (director/actor)
51+
TOP_PICKS_RECENCY_CAP: Final[float] = 0.10 # Max 10% recent items (from trending/popular)
52+
TOP_PICKS_GENRE_CAP: Final[float] = 0.40 # Max 40% per genre
53+
TOP_PICKS_CREATOR_CAP: Final[int] = 3 # Max 3 items per creator (director/actor)
5454
TOP_PICKS_ERA_CAP: Final[float] = 0.40 # Max 40% per era
55-
TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 300 # Minimum vote count for quality
56-
TOP_PICKS_MIN_RATING: Final[float] = 5.0 # Minimum weighted rating for quality
55+
TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 500 # Minimum vote count for quality
56+
TOP_PICKS_MIN_RATING: Final[float] = 7.5 # Minimum weighted rating for quality
5757

5858
# Genre whitelist limit (top N genres)
59-
GENRE_WHITELIST_LIMIT: Final[int] = 5
59+
GENRE_WHITELIST_LIMIT: Final[int] = 7

app/services/profile/scorer.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
class ProfileScorer:
1414
"""
1515
Scores items against taste profile using unified function.
16-
17-
Design principle: Same function everywhere, no special cases.
1816
"""
1917

2018
@staticmethod
@@ -96,7 +94,7 @@ def _extract_cast_ids(item_metadata: dict[str, Any]) -> list[int]:
9694
cast_ids = []
9795
credits = item_metadata.get("credits", {}) or {}
9896
cast_list = credits.get("cast", []) or []
99-
for actor in cast_list[:10]: # Top 10 only
97+
for actor in cast_list[:5]: # Top 5 only
10098
if isinstance(actor, dict):
10199
actor_id = actor.get("id")
102100
if actor_id:

app/services/recommendation/scoring.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class RecommendationScoring:
1212
"""
1313

1414
@staticmethod
15-
def weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 300) -> float:
15+
def weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 150) -> float:
1616
"""IMDb-style weighted rating on 0-10 scale."""
1717
try:
1818
R = float(vote_avg or 0.0)
@@ -95,9 +95,9 @@ def apply_quality_adjustments(score: float, wr: float, vote_count: int, is_ranke
9595
"""Apply multiplicative adjustments based on item quality and source."""
9696
q_mult = 1.0
9797
if vote_count < 50:
98-
q_mult *= 0.6
98+
q_mult *= 0.75
9999
elif vote_count < 150:
100-
q_mult *= 0.85
100+
q_mult *= 0.90
101101

102102
if wr < 5.5:
103103
q_mult *= 0.5

app/services/recommendation/top_picks.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ async def _fetch_recommendations_from_top_items(
146146
List of candidate items
147147
"""
148148
# Get top items (loved first, then liked, then added, then top watched)
149-
top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=10)
149+
top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=15)
150150

151151
candidates = []
152152
tasks = []
@@ -204,29 +204,35 @@ async def _fetch_discover_with_profile(
204204
# Discover with genres
205205
if top_genres:
206206
genre_ids = [g[0] for g in top_genres]
207-
tasks.append(
208-
self.tmdb_service.get_discover(
209-
mtype,
210-
with_genres="|".join(str(g) for g in genre_ids),
211-
page=1,
212-
sort_by="popularity.desc",
213-
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
214-
vote_average_gte=TOP_PICKS_MIN_RATING,
215-
)
207+
tasks.extend(
208+
[
209+
self.tmdb_service.get_discover(
210+
mtype,
211+
with_genres="|".join(str(g) for g in genre_ids),
212+
page=page,
213+
sort_by="popularity.asc",
214+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
215+
vote_average_gte=TOP_PICKS_MIN_RATING,
216+
)
217+
for page in [1, 2]
218+
]
216219
)
217220

218221
# Discover with keywords
219222
if top_keywords:
220223
keyword_ids = [k[0] for k in top_keywords]
221-
tasks.append(
222-
self.tmdb_service.get_discover(
223-
mtype,
224-
with_keywords="|".join(str(k) for k in keyword_ids),
225-
page=1,
226-
sort_by="popularity.desc",
227-
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
228-
vote_average_gte=TOP_PICKS_MIN_RATING,
229-
)
224+
tasks.extend(
225+
[
226+
self.tmdb_service.get_discover(
227+
mtype,
228+
with_keywords="|".join(str(k) for k in keyword_ids),
229+
page=page,
230+
sort_by="popularity.asc",
231+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
232+
vote_average_gte=TOP_PICKS_MIN_RATING,
233+
)
234+
for page in range(1, 4) # 3 pages
235+
]
230236
)
231237

232238
# Discover with directors
@@ -237,7 +243,7 @@ async def _fetch_discover_with_profile(
237243
mtype,
238244
with_crew=str(director_id),
239245
page=1,
240-
sort_by="popularity.desc",
246+
sort_by="popularity.asc",
241247
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
242248
vote_average_gte=TOP_PICKS_MIN_RATING,
243249
)
@@ -251,7 +257,7 @@ async def _fetch_discover_with_profile(
251257
mtype,
252258
with_cast=str(cast_id),
253259
page=1,
254-
sort_by="popularity.desc",
260+
sort_by="popularity.asc",
255261
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
256262
vote_average_gte=TOP_PICKS_MIN_RATING,
257263
)
@@ -268,7 +274,7 @@ async def _fetch_discover_with_profile(
268274
mtype,
269275
**{f"{prefix}.gte": f"{year_start}-01-01", f"{prefix}.lte": f"{year_start+9}-12-31"},
270276
page=1,
271-
sort_by="popularity.desc",
277+
sort_by="popularity.asc",
272278
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
273279
vote_average_gte=TOP_PICKS_MIN_RATING,
274280
)
@@ -282,7 +288,7 @@ async def _fetch_discover_with_profile(
282288
mtype,
283289
with_origin_country="|".join(country_codes),
284290
page=1,
285-
sort_by="popularity.desc",
291+
sort_by="popularity.asc",
286292
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
287293
vote_average_gte=TOP_PICKS_MIN_RATING,
288294
)
@@ -319,11 +325,11 @@ async def _fetch_trending_and_popular(self, content_type: str, mtype: str) -> li
319325
logger.debug(f"Failed to fetch trending: {e}")
320326

321327
# Fetch popular (top rated, 1 page)
322-
try:
323-
popular = await self.tmdb_service.get_top_rated(mtype, page=1)
324-
candidates.extend(popular.get("results", []))
325-
except Exception as e:
326-
logger.debug(f"Failed to fetch popular: {e}")
328+
# try:
329+
# popular = await self.tmdb_service.get_top_rated(mtype, page=1)
330+
# candidates.extend(popular.get("results", []))
331+
# except Exception as e:
332+
# logger.debug(f"Failed to fetch popular: {e}")
327333

328334
return candidates
329335

app/services/recommendation/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ async def pad_to_min(
167167

168168
# Quality threshold
169169
va, vc = float(it.get("vote_average") or 0.0), int(it.get("vote_count") or 0)
170-
if vc < 100 or va < 6.2:
170+
if vc < 200 or va < 6.0:
171171
continue
172172
dedup[tid] = it
173173
if len(dedup) >= need * 3:

0 commit comments

Comments
 (0)