refactor: update profile constants and scoring logic for improved recommendations

TimilsinaBimal · TimilsinaBimal · commit 3bea07189772 · 2026-01-02T12:45:37.000+05:45
diff --git a/app/services/profile/constants.py b/app/services/profile/constants.py
@@ -36,7 +36,7 @@
 CAP_COUNTRY: Final[float] = 20.0
 
 # Recency Decay (exponential decay parameters)
-RECENCY_HALF_LIFE_DAYS: Final[float] = 90.0  # 90-day half-life
+RECENCY_HALF_LIFE_DAYS: Final[float] = 15.0  # 15-day half-life
 RECENCY_DECAY_RATE: Final[float] = 0.98  # Daily decay multiplier (soft decay)
 
 # Smart Sampling
@@ -48,12 +48,12 @@
 FREQUENCY_MULTIPLIER_LOG_FACTOR: Final[float] = 0.1  # Subtle boost
 
 # Top Picks Caps (diversity constraints)
-TOP_PICKS_RECENCY_CAP: Final[float] = 0.15  # Max 15% recent items (from trending/popular)
-TOP_PICKS_GENRE_CAP: Final[float] = 0.30  # Max 30% per genre
-TOP_PICKS_CREATOR_CAP: Final[int] = 2  # Max 2 items per creator (director/actor)
+TOP_PICKS_RECENCY_CAP: Final[float] = 0.10  # Max 10% recent items (from trending/popular)
+TOP_PICKS_GENRE_CAP: Final[float] = 0.40  # Max 40% per genre
+TOP_PICKS_CREATOR_CAP: Final[int] = 3  # Max 3 items per creator (director/actor)
 TOP_PICKS_ERA_CAP: Final[float] = 0.40  # Max 40% per era
-TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 300  # Minimum vote count for quality
-TOP_PICKS_MIN_RATING: Final[float] = 5.0  # Minimum weighted rating for quality
+TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 500  # Minimum vote count for quality
+TOP_PICKS_MIN_RATING: Final[float] = 7.5  # Minimum weighted rating for quality
 
 # Genre whitelist limit (top N genres)
-GENRE_WHITELIST_LIMIT: Final[int] = 5
+GENRE_WHITELIST_LIMIT: Final[int] = 7
diff --git a/app/services/profile/scorer.py b/app/services/profile/scorer.py
@@ -13,8 +13,6 @@
 class ProfileScorer:
     """
     Scores items against taste profile using unified function.
-
-    Design principle: Same function everywhere, no special cases.
     """
 
     @staticmethod
@@ -96,7 +94,7 @@ def _extract_cast_ids(item_metadata: dict[str, Any]) -> list[int]:
         cast_ids = []
         credits = item_metadata.get("credits", {}) or {}
         cast_list = credits.get("cast", []) or []
-        for actor in cast_list[:10]:  # Top 10 only
+        for actor in cast_list[:5]:  # Top 5 only
             if isinstance(actor, dict):
                 actor_id = actor.get("id")
                 if actor_id:
diff --git a/app/services/recommendation/scoring.py b/app/services/recommendation/scoring.py
@@ -12,7 +12,7 @@ class RecommendationScoring:
     """
 
     @staticmethod
-    def weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 300) -> float:
+    def weighted_rating(vote_avg: float | None, vote_count: int | None, C: float = 6.8, m: int = 150) -> float:
         """IMDb-style weighted rating on 0-10 scale."""
         try:
             R = float(vote_avg or 0.0)
@@ -95,9 +95,9 @@ def apply_quality_adjustments(score: float, wr: float, vote_count: int, is_ranke
         """Apply multiplicative adjustments based on item quality and source."""
         q_mult = 1.0
         if vote_count < 50:
-            q_mult *= 0.6
+            q_mult *= 0.75
         elif vote_count < 150:
-            q_mult *= 0.85
+            q_mult *= 0.90
 
         if wr < 5.5:
             q_mult *= 0.5
diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py
@@ -146,7 +146,7 @@ async def _fetch_recommendations_from_top_items(
             List of candidate items
         """
         # Get top items (loved first, then liked, then added, then top watched)
-        top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=10)
+        top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=15)
 
         candidates = []
         tasks = []
@@ -204,29 +204,35 @@ async def _fetch_discover_with_profile(
         # Discover with genres
         if top_genres:
             genre_ids = [g[0] for g in top_genres]
-            tasks.append(
-                self.tmdb_service.get_discover(
-                    mtype,
-                    with_genres="|".join(str(g) for g in genre_ids),
-                    page=1,
-                    sort_by="popularity.desc",
-                    vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
-                    vote_average_gte=TOP_PICKS_MIN_RATING,
-                )
+            tasks.extend(
+                [
+                    self.tmdb_service.get_discover(
+                        mtype,
+                        with_genres="|".join(str(g) for g in genre_ids),
+                        page=page,
+                        sort_by="popularity.asc",
+                        vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
+                        vote_average_gte=TOP_PICKS_MIN_RATING,
+                    )
+                    for page in [1, 2]
+                ]
             )
 
         # Discover with keywords
         if top_keywords:
             keyword_ids = [k[0] for k in top_keywords]
-            tasks.append(
-                self.tmdb_service.get_discover(
-                    mtype,
-                    with_keywords="|".join(str(k) for k in keyword_ids),
-                    page=1,
-                    sort_by="popularity.desc",
-                    vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
-                    vote_average_gte=TOP_PICKS_MIN_RATING,
-                )
+            tasks.extend(
+                [
+                    self.tmdb_service.get_discover(
+                        mtype,
+                        with_keywords="|".join(str(k) for k in keyword_ids),
+                        page=page,
+                        sort_by="popularity.asc",
+                        vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
+                        vote_average_gte=TOP_PICKS_MIN_RATING,
+                    )
+                    for page in range(1, 4)  # 3 pages
+                ]
             )
 
         # Discover with directors
@@ -237,7 +243,7 @@ async def _fetch_discover_with_profile(
                     mtype,
                     with_crew=str(director_id),
                     page=1,
-                    sort_by="popularity.desc",
+                    sort_by="popularity.asc",
                     vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
                     vote_average_gte=TOP_PICKS_MIN_RATING,
                 )
@@ -251,7 +257,7 @@ async def _fetch_discover_with_profile(
                     mtype,
                     with_cast=str(cast_id),
                     page=1,
-                    sort_by="popularity.desc",
+                    sort_by="popularity.asc",
                     vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
                     vote_average_gte=TOP_PICKS_MIN_RATING,
                 )
@@ -268,7 +274,7 @@ async def _fetch_discover_with_profile(
                         mtype,
                         **{f"{prefix}.gte": f"{year_start}-01-01", f"{prefix}.lte": f"{year_start+9}-12-31"},
                         page=1,
-                        sort_by="popularity.desc",
+                        sort_by="popularity.asc",
                         vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
                         vote_average_gte=TOP_PICKS_MIN_RATING,
                     )
@@ -282,7 +288,7 @@ async def _fetch_discover_with_profile(
                     mtype,
                     with_origin_country="|".join(country_codes),
                     page=1,
-                    sort_by="popularity.desc",
+                    sort_by="popularity.asc",
                     vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
                     vote_average_gte=TOP_PICKS_MIN_RATING,
                 )
@@ -319,11 +325,11 @@ async def _fetch_trending_and_popular(self, content_type: str, mtype: str) -> li
             logger.debug(f"Failed to fetch trending: {e}")
 
         # Fetch popular (top rated, 1 page)
-        try:
-            popular = await self.tmdb_service.get_top_rated(mtype, page=1)
-            candidates.extend(popular.get("results", []))
-        except Exception as e:
-            logger.debug(f"Failed to fetch popular: {e}")
+        # try:
+        #     popular = await self.tmdb_service.get_top_rated(mtype, page=1)
+        #     candidates.extend(popular.get("results", []))
+        # except Exception as e:
+        #     logger.debug(f"Failed to fetch popular: {e}")
 
         return candidates
 
diff --git a/app/services/recommendation/utils.py b/app/services/recommendation/utils.py
@@ -167,7 +167,7 @@ async def pad_to_min(
 
         # Quality threshold
         va, vc = float(it.get("vote_average") or 0.0), int(it.get("vote_count") or 0)
-        if vc < 100 or va < 6.2:
+        if vc < 200 or va < 6.0:
             continue
         dedup[tid] = it
         if len(dedup) >= need * 3: