Skip to content

Commit ed126a7

Browse files
refactor: update profile constants and enhance recommendation logic
- Reduced SMART_SAMPLING_MAX_ITEMS from 50 to 30 for improved sampling efficiency. - Increased TOP_PICKS_MIN_VOTE_COUNT from 100 to 300 to ensure higher quality recommendations. - Modified SmartSampler to include a percentage-based approach for strong signal items. - Updated TopPicksService to utilize SmartSampler for item selection and added filtering by vote count and rating in discovery queries. - Adjusted TMDBService caching settings for better performance and reduced cache sizes.
1 parent 81c8252 commit ed126a7

File tree

5 files changed

+82
-38
lines changed

5 files changed

+82
-38
lines changed

app/services/profile/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
RECENCY_DECAY_RATE: Final[float] = 0.98 # Daily decay multiplier (soft decay)
4141

4242
# Smart Sampling
43-
SMART_SAMPLING_MAX_ITEMS: Final[int] = 50
43+
SMART_SAMPLING_MAX_ITEMS: Final[int] = 30
4444

4545
# Frequency Multiplier (optional, subtle boost for repeated patterns)
4646
FREQUENCY_ENABLED: Final[bool] = True
@@ -52,7 +52,7 @@
5252
TOP_PICKS_GENRE_CAP: Final[float] = 0.30 # Max 30% per genre
5353
TOP_PICKS_CREATOR_CAP: Final[int] = 2 # Max 2 items per creator (director/actor)
5454
TOP_PICKS_ERA_CAP: Final[float] = 0.40 # Max 40% per era
55-
TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 100 # Minimum vote count for quality
55+
TOP_PICKS_MIN_VOTE_COUNT: Final[int] = 300 # Minimum vote count for quality
5656
TOP_PICKS_MIN_RATING: Final[float] = 5.0 # Minimum weighted rating for quality
5757

5858
# Genre whitelist limit (top N genres)

app/services/profile/sampling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def sample_items(
6969
if not (it.get("_is_loved") or it.get("_is_liked") or it.get("_id") in added_item_ids)
7070
]
7171

72-
# Always include all strong signal items
73-
strong_signal_items = loved_liked_items + added_items
72+
# Always include strong signal items: Loved/Liked: 45%, Added: 20%
73+
strong_signal_items = loved_liked_items[: int(max_items * 0.45)] + added_items[: int(max_items * 0.20)]
7474
strong_signal_scored = [self.scoring_service.process_item(it) for it in strong_signal_items]
7575

7676
# Score watched items and sort by score

app/services/recommendation/item_based.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
filter_watched_by_imdb,
1212
resolve_tmdb_id,
1313
)
14+
from app.services.tmdb.service import TMDBService
1415

1516

1617
class ItemBasedService:
@@ -19,7 +20,7 @@ class ItemBasedService:
1920
"""
2021

2122
def __init__(self, tmdb_service: Any, user_settings: Any = None):
22-
self.tmdb_service = tmdb_service
23+
self.tmdb_service: TMDBService = tmdb_service
2324
self.user_settings = user_settings
2425

2526
async def get_recommendations_for_item(
@@ -77,7 +78,7 @@ async def get_recommendations_for_item(
7778
# Final filter (remove watched by IMDB ID)
7879
final = filter_watched_by_imdb(enriched, watched_imdb or set())
7980

80-
return final
81+
return final[:limit]
8182

8283
async def _fetch_candidates(self, tmdb_id: int, mtype: str) -> list[dict[str, Any]]:
8384
"""
@@ -92,18 +93,18 @@ async def _fetch_candidates(self, tmdb_id: int, mtype: str) -> list[dict[str, An
9293
"""
9394
combined = {}
9495

95-
# Fetch 2 pages each for recommendations and similar
96-
for action in ["recommendations", "similar"]:
97-
method = getattr(self.tmdb_service, f"get_{action}")
98-
results = await asyncio.gather(*[method(tmdb_id, mtype, page=p) for p in [1, 2]], return_exceptions=True)
99-
100-
for res in results:
101-
if isinstance(res, Exception):
102-
logger.debug(f"Error fetching {action} for {tmdb_id}: {res}")
103-
continue
104-
for item in res.get("results", []):
105-
item_id = item.get("id")
106-
if item_id:
107-
combined[item_id] = item
96+
results = await asyncio.gather(
97+
*[self.tmdb_service.get_recommendations(tmdb_id, mtype, page=p) for p in [1, 2]],
98+
return_exceptions=True,
99+
)
100+
101+
for res in results:
102+
if isinstance(res, Exception):
103+
logger.warning(f"Error fetching recommendations for {tmdb_id}: {res}")
104+
continue
105+
for item in res.get("results", []):
106+
item_id = item.get("id")
107+
if item_id:
108+
combined[item_id] = item
108109

109110
return list(combined.values())

app/services/recommendation/top_picks.py

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
TOP_PICKS_MIN_VOTE_COUNT,
1515
TOP_PICKS_RECENCY_CAP,
1616
)
17+
from app.services.profile.sampling import SmartSampler
1718
from app.services.profile.scorer import ProfileScorer
1819
from app.services.recommendation.metadata import RecommendationMetadata
1920
from app.services.recommendation.scoring import RecommendationScoring
2021
from app.services.recommendation.utils import content_type_to_mtype, filter_watched_by_imdb, resolve_tmdb_id
22+
from app.services.scoring import ScoringService
2123
from app.services.tmdb.service import TMDBService
2224

2325

@@ -30,6 +32,8 @@ def __init__(self, tmdb_service: TMDBService, user_settings: UserSettings | None
3032
self.tmdb_service: TMDBService = tmdb_service
3133
self.user_settings: UserSettings | None = user_settings
3234
self.scorer: ProfileScorer = ProfileScorer()
35+
self.scoring_service = ScoringService()
36+
self.smart_sampler = SmartSampler(self.scoring_service)
3337

3438
async def get_top_picks(
3539
self,
@@ -142,16 +146,7 @@ async def _fetch_recommendations_from_top_items(
142146
List of candidate items
143147
"""
144148
# Get top items (loved first, then liked, then added, then top watched)
145-
all_items = (
146-
library_items.get("loved", [])
147-
+ library_items.get("liked", [])
148-
+ library_items.get("added", [])
149-
+ library_items.get("watched", [])
150-
)
151-
typed_items = [it for it in all_items if it.get("type") == content_type]
152-
153-
# Limit to top 5 items (to avoid too many API calls)
154-
top_items = typed_items[:5]
149+
top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=15)
155150

156151
candidates = []
157152
tasks = []
@@ -168,7 +163,7 @@ async def _fetch_recommendations_from_top_items(
168163

169164
# Fetch recommendations (1 page only)
170165
tasks.append(self.tmdb_service.get_recommendations(tmdb_id, mtype, page=1))
171-
tasks.append(self.tmdb_service.get_similar(tmdb_id, mtype, page=1))
166+
# tasks.append(self.tmdb_service.get_similar(tmdb_id, mtype, page=1))
172167

173168
# Execute all in parallel
174169
results = await asyncio.gather(*tasks, return_exceptions=True)
@@ -196,10 +191,11 @@ async def _fetch_discover_with_profile(
196191
"""
197192
# Get top features from profile
198193
top_genres = profile.get_top_genres(limit=2)
199-
top_keywords = profile.get_top_keywords(limit=2)
194+
top_keywords = profile.get_top_keywords(limit=3)
200195
top_directors = profile.get_top_directors(limit=2)
201196
top_cast = profile.get_top_cast(limit=2)
202197
top_eras = profile.get_top_eras(limit=1)
198+
top_countries = profile.get_top_countries(limit=1)
203199

204200
candidates = []
205201
tasks = []
@@ -209,7 +205,12 @@ async def _fetch_discover_with_profile(
209205
genre_ids = [g[0] for g in top_genres]
210206
tasks.append(
211207
self.tmdb_service.get_discover(
212-
mtype, with_genres="|".join(str(g) for g in genre_ids), page=1, sort_by="popularity.desc"
208+
mtype,
209+
with_genres="|".join(str(g) for g in genre_ids),
210+
page=1,
211+
sort_by="popularity.desc",
212+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
213+
vote_average_gte=TOP_PICKS_MIN_RATING,
213214
)
214215
)
215216

@@ -218,22 +219,41 @@ async def _fetch_discover_with_profile(
218219
keyword_ids = [k[0] for k in top_keywords]
219220
tasks.append(
220221
self.tmdb_service.get_discover(
221-
mtype, with_keywords="|".join(str(k) for k in keyword_ids), page=1, sort_by="popularity.desc"
222+
mtype,
223+
with_keywords="|".join(str(k) for k in keyword_ids),
224+
page=1,
225+
sort_by="popularity.desc",
226+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
227+
vote_average_gte=TOP_PICKS_MIN_RATING,
222228
)
223229
)
224230

225231
# Discover with directors
226232
if top_directors:
227233
director_id = top_directors[0][0]
228234
tasks.append(
229-
self.tmdb_service.get_discover(mtype, with_crew=str(director_id), page=1, sort_by="popularity.desc")
235+
self.tmdb_service.get_discover(
236+
mtype,
237+
with_crew=str(director_id),
238+
page=1,
239+
sort_by="popularity.desc",
240+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
241+
vote_average_gte=TOP_PICKS_MIN_RATING,
242+
)
230243
)
231244

232245
# Discover with cast
233246
if top_cast:
234247
cast_id = top_cast[0][0]
235248
tasks.append(
236-
self.tmdb_service.get_discover(mtype, with_cast=str(cast_id), page=1, sort_by="popularity.desc")
249+
self.tmdb_service.get_discover(
250+
mtype,
251+
with_cast=str(cast_id),
252+
page=1,
253+
sort_by="popularity.desc",
254+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
255+
vote_average_gte=TOP_PICKS_MIN_RATING,
256+
)
237257
)
238258

239259
# Discover with era (year range)
@@ -248,9 +268,25 @@ async def _fetch_discover_with_profile(
248268
**{f"{prefix}.gte": f"{year_start}-01-01", f"{prefix}.lte": f"{year_start+9}-12-31"},
249269
page=1,
250270
sort_by="popularity.desc",
271+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
272+
vote_average_gte=TOP_PICKS_MIN_RATING,
251273
)
252274
)
253275

276+
# Discover with countries
277+
if top_countries:
278+
country_codes = [c[0] for c in top_countries]
279+
tasks.append(
280+
self.tmdb_service.get_discover(
281+
mtype,
282+
with_origin_country="|".join(country_codes),
283+
page=1,
284+
sort_by="popularity.desc",
285+
vote_count_gte=TOP_PICKS_MIN_VOTE_COUNT,
286+
vote_average_gte=TOP_PICKS_MIN_RATING,
287+
)
288+
)
289+
254290
# Execute all in parallel
255291
results = await asyncio.gather(*tasks, return_exceptions=True)
256292

app/services/tmdb/service.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from app.services.tmdb.client import TMDBClient
88

9+
# from app.services.profile.constants import TOP_PICKS_MIN_VOTE_COUNT, TOP_PICKS_MIN_RATING
10+
911

1012
class TMDBService:
1113
"""
@@ -20,7 +22,7 @@ async def close(self):
2022
"""Close the underlying HTTP client."""
2123
await self.client.close()
2224

23-
@alru_cache(maxsize=2000)
25+
@alru_cache(maxsize=1000)
2426
async def find_by_imdb_id(self, imdb_id: str) -> tuple[int | None, str | None]:
2527
"""Find TMDB ID and type by IMDB ID."""
2628
try:
@@ -49,23 +51,25 @@ async def find_by_imdb_id(self, imdb_id: str) -> tuple[int | None, str | None]:
4951
logger.exception(f"Error finding TMDB ID for IMDB {imdb_id}: {e}")
5052
return None, None
5153

52-
@alru_cache(maxsize=5000)
54+
@alru_cache(maxsize=500)
5355
async def get_movie_details(self, movie_id: int) -> dict[str, Any]:
5456
"""Get details of a specific movie with credits and keywords."""
5557
params = {"append_to_response": "credits,external_ids,keywords"}
5658
return await self.client.get(f"/movie/{movie_id}", params=params)
5759

58-
@alru_cache(maxsize=5000)
60+
@alru_cache(maxsize=500)
5961
async def get_tv_details(self, tv_id: int) -> dict[str, Any]:
6062
"""Get details of a specific TV series with credits and keywords."""
6163
params = {"append_to_response": "credits,external_ids,keywords"}
6264
return await self.client.get(f"/tv/{tv_id}", params=params)
6365

66+
@alru_cache(maxsize=500, ttl=86400)
6467
async def get_recommendations(self, tmdb_id: int, media_type: str, page: int = 1) -> dict[str, Any]:
6568
"""Get recommendations based on TMDB ID and media type."""
6669
params = {"page": page}
6770
return await self.client.get(f"/{media_type}/{tmdb_id}/recommendations", params=params)
6871

72+
@alru_cache(maxsize=500, ttl=86400)
6973
async def get_similar(self, tmdb_id: int, media_type: str, page: int = 1) -> dict[str, Any]:
7074
"""Get similar content based on TMDB ID and media type."""
7175
params = {"page": page}
@@ -84,6 +88,9 @@ async def get_discover(
8488
params = {"page": page, "sort_by": sort_by}
8589
if with_genres:
8690
params["with_genres"] = with_genres
91+
# # always filter by vote count
92+
# params["vote_count.gte"] = TOP_PICKS_MIN_VOTE_COUNT
93+
# params["vote_average.gte"] = TOP_PICKS_MIN_RATING
8794
params.update(kwargs)
8895
return await self.client.get(f"/discover/{mt}", params=params)
8996

0 commit comments

Comments
 (0)