@@ -34,11 +34,21 @@ async def discover_recommendations(
3434 """
3535 Find content that matches the user's taste profile using multi-phase TMDB discovery.
3636 """
37+ # Calculate pages to fetch per query based on excluded genres
38+ num_excluded = len (excluded_genres ) if excluded_genres else 0
39+ if num_excluded > 10 :
40+ pages_per_query = 5 # Fetch 5 pages when most genres are excluded
41+ elif num_excluded > 5 :
42+ pages_per_query = 3 # Fetch 3 pages when many genres are excluded
43+ else :
44+ pages_per_query = 1 # Default: 1 page
45+
3746 # 1. Build Phase 1 Tasks
3847 tasks = self ._build_discovery_tasks_phase1 (
3948 profile ,
4049 content_type ,
4150 excluded_genres ,
51+ pages_per_query = pages_per_query ,
4252 use_genres = use_genres ,
4353 use_keywords = use_keywords ,
4454 use_cast = use_cast ,
@@ -68,6 +78,7 @@ async def discover_recommendations(
6878 profile ,
6979 content_type ,
7080 excluded_genres ,
81+ pages_per_query = pages_per_query ,
7182 use_genres = use_genres ,
7283 use_keywords = use_keywords ,
7384 use_cast = use_cast ,
@@ -88,6 +99,7 @@ def _build_discovery_tasks_phase1(
8899 profile : UserTasteProfile ,
89100 content_type : str ,
90101 excluded_genres : list [int ] | None = None ,
102+ pages_per_query : int = 1 ,
91103 ** opts ,
92104 ) -> list [Any ]:
93105 """Construct the initial set of discovery tasks based on top profile features."""
@@ -106,32 +118,51 @@ def _build_discovery_tasks_phase1(
106118 if excluded_genres :
107119 base_params ["without_genres" ] = "|" .join ([str (g ) for g in excluded_genres ])
108120
109- # Query 1: Top Genres
121+ # Query 1: Top Genres - fetch multiple pages
110122 if top_genres :
111123 genre_ids = "|" .join ([str (g [0 ]) for g in top_genres ])
112- tasks .append (
113- self ._fetch_discovery (
114- content_type ,
115- {"with_genres" : genre_ids , "sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params },
124+ for page in range (1 , pages_per_query + 1 ):
125+ tasks .append (
126+ self ._fetch_discovery (
127+ content_type ,
128+ {
129+ "with_genres" : genre_ids ,
130+ "sort_by" : "popularity.desc" ,
131+ "vote_count.gte" : 500 ,
132+ "page" : page ,
133+ ** base_params ,
134+ },
135+ )
116136 )
117- )
118- tasks .append (
119- self ._fetch_discovery (
120- content_type ,
121- {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , ** base_params },
137+ tasks .append (
138+ self ._fetch_discovery (
139+ content_type ,
140+ {
141+ "with_genres" : genre_ids ,
142+ "sort_by" : "vote_average.desc" ,
143+ "vote_count.gte" : 500 ,
144+ "page" : page ,
145+ ** base_params ,
146+ },
147+ )
122148 )
123- )
124149
125- # Query 2: Top Keywords
150+ # Query 2: Top Keywords - fetch multiple pages
126151 if top_keywords :
127152 keyword_ids = "|" .join ([str (k [0 ]) for k in top_keywords ])
128- tasks .append (
129- self ._fetch_discovery (
130- content_type ,
131- {"with_keywords" : keyword_ids , "sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params },
153+ for page in range (1 , pages_per_query + 1 ):
154+ tasks .append (
155+ self ._fetch_discovery (
156+ content_type ,
157+ {
158+ "with_keywords" : keyword_ids ,
159+ "sort_by" : "popularity.desc" ,
160+ "vote_count.gte" : 500 ,
161+ "page" : page ,
162+ ** base_params ,
163+ },
164+ )
132165 )
133- )
134- for page in range (1 , 3 ):
135166 tasks .append (
136167 self ._fetch_discovery (
137168 content_type ,
@@ -145,55 +176,62 @@ def _build_discovery_tasks_phase1(
145176 )
146177 )
147178
148- # Query 3: Cast & Crew
179+ # Query 3: Cast & Crew - fetch multiple pages
149180 is_tv = content_type in ("tv" , "series" )
150181 for actor in top_cast :
151- p = {"sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params }
152- p ["with_people" if is_tv else "with_cast" ] = str (actor [0 ])
153- tasks .append (self ._fetch_discovery (content_type , p ))
182+ for page in range (1 , pages_per_query + 1 ):
183+ p = {"sort_by" : "popularity.desc" , "vote_count.gte" : 500 , "page" : page , ** base_params }
184+ p ["with_people" if is_tv else "with_cast" ] = str (actor [0 ])
185+ tasks .append (self ._fetch_discovery (content_type , p ))
154186
155187 if top_crew :
156- p = {"sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , ** base_params }
157- p ["with_people" if is_tv else "with_crew" ] = str (top_crew [0 ][0 ])
158- tasks .append (self ._fetch_discovery (content_type , p ))
188+ for page in range (1 , pages_per_query + 1 ):
189+ p = {"sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , "page" : page , ** base_params }
190+ p ["with_people" if is_tv else "with_crew" ] = str (top_crew [0 ][0 ])
191+ tasks .append (self ._fetch_discovery (content_type , p ))
159192
160- # Query 4: Countries & Year
193+ # Query 4: Countries & Year - fetch multiple pages
161194 if top_countries :
162195 country_ids = "|" .join ([str (c [0 ]) for c in top_countries ])
163- tasks .append (
164- self ._fetch_discovery (
165- content_type ,
166- {
167- "with_origin_country" : country_ids ,
168- "sort_by" : "popularity.desc" ,
169- "vote_count.gte" : 100 ,
170- ** base_params ,
171- },
196+ for page in range (1 , pages_per_query + 1 ):
197+ tasks .append (
198+ self ._fetch_discovery (
199+ content_type ,
200+ {
201+ "with_origin_country" : country_ids ,
202+ "sort_by" : "popularity.desc" ,
203+ "vote_count.gte" : 100 ,
204+ "page" : page ,
205+ ** base_params ,
206+ },
207+ )
172208 )
173- )
174209
175210 if top_year :
176211 year = top_year [0 ][0 ]
177212 prefix = "first_air_date" if is_tv else "primary_release_date"
178- tasks .append (
179- self ._fetch_discovery (
180- content_type ,
181- {
182- "sort_by" : "vote_average.desc" ,
183- "vote_count.gte" : 500 ,
184- f"{ prefix } .gte" : f"{ year } -01-01" ,
185- f"{ prefix } .lte" : f"{ int (year )+ 9 } -12-31" ,
186- ** base_params ,
187- },
213+ for page in range (1 , pages_per_query + 1 ):
214+ tasks .append (
215+ self ._fetch_discovery (
216+ content_type ,
217+ {
218+ "sort_by" : "vote_average.desc" ,
219+ "vote_count.gte" : 500 ,
220+ f"{ prefix } .gte" : f"{ year } -01-01" ,
221+ f"{ prefix } .lte" : f"{ int (year )+ 9 } -12-31" ,
222+ "page" : page ,
223+ ** base_params ,
224+ },
225+ )
188226 )
189- )
190227 return tasks
191228
192229 def _build_discovery_tasks_phase2 (
193230 self ,
194231 profile : UserTasteProfile ,
195232 content_type : str ,
196233 excluded_genres : list [int ] | None = None ,
234+ pages_per_query : int = 1 ,
197235 ** opts ,
198236 ) -> list [Any ]:
199237 """Construct additional discovery tasks with lower thresholds to fill out candidate pool."""
@@ -202,32 +240,41 @@ def _build_discovery_tasks_phase2(
202240 top_cast = profile .cast .get_top_features (limit = 1 ) if opts .get ("use_cast" ) else []
203241
204242 tasks = []
205- base_params = {"vote_count.gte" : 400 , "page" : 2 }
243+ base_params = {"vote_count.gte" : 400 }
206244 if excluded_genres :
207245 base_params ["without_genres" ] = "|" .join ([str (g ) for g in excluded_genres ])
208246
247+ # Start from page 2 for phase 2, but fetch multiple pages if needed
248+ start_page = 2
249+ end_page = start_page + pages_per_query
250+
209251 if top_genres :
210252 genre_ids = "|" .join ([str (g [0 ]) for g in top_genres ])
211- tasks .append (
212- self ._fetch_discovery (
213- content_type , {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , ** base_params }
253+ for page in range (start_page , end_page ):
254+ tasks .append (
255+ self ._fetch_discovery (
256+ content_type ,
257+ {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , "page" : page , ** base_params },
258+ )
214259 )
215- )
216260
217261 if top_keywords :
218262 keyword_ids = "|" .join ([str (k [0 ]) for k in top_keywords ])
219- tasks .append (
220- self ._fetch_discovery (
221- content_type , {"with_keywords" : keyword_ids , "sort_by" : "vote_average.desc" , ** base_params }
263+ for page in range (start_page , end_page ):
264+ tasks .append (
265+ self ._fetch_discovery (
266+ content_type ,
267+ {"with_keywords" : keyword_ids , "sort_by" : "vote_average.desc" , "page" : page , ** base_params },
268+ )
222269 )
223- )
224270
225271 if top_cast :
226272 actor_id = top_cast [0 ][0 ]
227273 is_tv = content_type in ("tv" , "series" )
228- p = {"sort_by" : "vote_average.desc" , ** base_params }
229- p ["with_people" if is_tv else "with_cast" ] = str (actor_id )
230- tasks .append (self ._fetch_discovery (content_type , p ))
274+ for page in range (start_page , end_page ):
275+ p = {"sort_by" : "vote_average.desc" , "page" : page , ** base_params }
276+ p ["with_people" if is_tv else "with_cast" ] = str (actor_id )
277+ tasks .append (self ._fetch_discovery (content_type , p ))
231278
232279 return tasks
233280
0 commit comments