@@ -34,11 +34,21 @@ async def discover_recommendations(
3434 """
3535 Find content that matches the user's taste profile using multi-phase TMDB discovery.
3636 """
37+ # Calculate pages to fetch per query based on excluded genres
38+ num_excluded = len (excluded_genres ) if excluded_genres else 0
39+ if num_excluded > 10 :
40+ pages_per_query = 5 # Fetch 5 pages when most genres are excluded
41+ elif num_excluded > 5 :
42+ pages_per_query = 3 # Fetch 3 pages when many genres are excluded
43+ else :
44+ pages_per_query = 1 # Default: 1 page
45+
3746 # 1. Build Phase 1 Tasks
3847 tasks = self ._build_discovery_tasks_phase1 (
3948 profile ,
4049 content_type ,
4150 excluded_genres ,
51+ pages_per_query = pages_per_query ,
4252 use_genres = use_genres ,
4353 use_keywords = use_keywords ,
4454 use_cast = use_cast ,
@@ -68,6 +78,7 @@ async def discover_recommendations(
6878 profile ,
6979 content_type ,
7080 excluded_genres ,
81+ pages_per_query = pages_per_query ,
7182 use_genres = use_genres ,
7283 use_keywords = use_keywords ,
7384 use_cast = use_cast ,
@@ -88,6 +99,7 @@ def _build_discovery_tasks_phase1(
8899 profile : UserTasteProfile ,
89100 content_type : str ,
90101 excluded_genres : list [int ] | None = None ,
102+ pages_per_query : int = 1 ,
91103 ** opts ,
92104 ) -> list [Any ]:
93105 """Construct the initial set of discovery tasks based on top profile features."""
@@ -106,32 +118,40 @@ def _build_discovery_tasks_phase1(
106118 if excluded_genres :
107119 base_params ["without_genres" ] = "|" .join ([str (g ) for g in excluded_genres ])
108120
109- # Query 1: Top Genres
121+ # Query 1: Top Genres - fetch multiple pages
110122 if top_genres :
111123 genre_ids = "|" .join ([str (g [0 ]) for g in top_genres ])
112- tasks .append (
113- self ._fetch_discovery (
114- content_type ,
115- {"with_genres" : genre_ids , "sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params },
116- )
117- )
118- tasks .append (
119- self ._fetch_discovery (
120- content_type ,
121- {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , ** base_params },
122- )
123- )
124+ for page in range (1 , pages_per_query + 1 ):
125+ for sort_by_option in ["popularity.desc" , "vote_average.desc" ]:
126+ tasks .append (
127+ self ._fetch_discovery (
128+ content_type ,
129+ {
130+ "with_genres" : genre_ids ,
131+ "sort_by" : sort_by_option ,
132+ "vote_count.gte" : 500 ,
133+ "page" : page ,
134+ ** base_params ,
135+ },
136+ )
137+ )
124138
125- # Query 2: Top Keywords
139+ # Query 2: Top Keywords - fetch multiple pages
126140 if top_keywords :
127141 keyword_ids = "|" .join ([str (k [0 ]) for k in top_keywords ])
128- tasks .append (
129- self ._fetch_discovery (
130- content_type ,
131- {"with_keywords" : keyword_ids , "sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params },
142+ for page in range (1 , pages_per_query + 1 ):
143+ tasks .append (
144+ self ._fetch_discovery (
145+ content_type ,
146+ {
147+ "with_keywords" : keyword_ids ,
148+ "sort_by" : "popularity.desc" ,
149+ "vote_count.gte" : 500 ,
150+ "page" : page ,
151+ ** base_params ,
152+ },
153+ )
132154 )
133- )
134- for page in range (1 , 3 ):
135155 tasks .append (
136156 self ._fetch_discovery (
137157 content_type ,
@@ -145,55 +165,62 @@ def _build_discovery_tasks_phase1(
145165 )
146166 )
147167
148- # Query 3: Cast & Crew
168+ # Query 3: Cast & Crew - fetch multiple pages
149169 is_tv = content_type in ("tv" , "series" )
150170 for actor in top_cast :
151- p = {"sort_by" : "popularity.desc" , "vote_count.gte" : 500 , ** base_params }
152- p ["with_people" if is_tv else "with_cast" ] = str (actor [0 ])
153- tasks .append (self ._fetch_discovery (content_type , p ))
171+ for page in range (1 , pages_per_query + 1 ):
172+ p = {"sort_by" : "popularity.desc" , "vote_count.gte" : 500 , "page" : page , ** base_params }
173+ p ["with_people" if is_tv else "with_cast" ] = str (actor [0 ])
174+ tasks .append (self ._fetch_discovery (content_type , p ))
154175
155176 if top_crew :
156- p = {"sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , ** base_params }
157- p ["with_people" if is_tv else "with_crew" ] = str (top_crew [0 ][0 ])
158- tasks .append (self ._fetch_discovery (content_type , p ))
177+ for page in range (1 , pages_per_query + 1 ):
178+ p = {"sort_by" : "vote_average.desc" , "vote_count.gte" : 500 , "page" : page , ** base_params }
179+ p ["with_people" if is_tv else "with_crew" ] = str (top_crew [0 ][0 ])
180+ tasks .append (self ._fetch_discovery (content_type , p ))
159181
160- # Query 4: Countries & Year
182+ # Query 4: Countries & Year - fetch multiple pages
161183 if top_countries :
162184 country_ids = "|" .join ([str (c [0 ]) for c in top_countries ])
163- tasks .append (
164- self ._fetch_discovery (
165- content_type ,
166- {
167- "with_origin_country" : country_ids ,
168- "sort_by" : "popularity.desc" ,
169- "vote_count.gte" : 100 ,
170- ** base_params ,
171- },
185+ for page in range (1 , pages_per_query + 1 ):
186+ tasks .append (
187+ self ._fetch_discovery (
188+ content_type ,
189+ {
190+ "with_origin_country" : country_ids ,
191+ "sort_by" : "popularity.desc" ,
192+ "vote_count.gte" : 100 ,
193+ "page" : page ,
194+ ** base_params ,
195+ },
196+ )
172197 )
173- )
174198
175199 if top_year :
176200 year = top_year [0 ][0 ]
177201 prefix = "first_air_date" if is_tv else "primary_release_date"
178- tasks .append (
179- self ._fetch_discovery (
180- content_type ,
181- {
182- "sort_by" : "vote_average.desc" ,
183- "vote_count.gte" : 500 ,
184- f"{ prefix } .gte" : f"{ year } -01-01" ,
185- f"{ prefix } .lte" : f"{ int (year )+ 9 } -12-31" ,
186- ** base_params ,
187- },
202+ for page in range (1 , pages_per_query + 1 ):
203+ tasks .append (
204+ self ._fetch_discovery (
205+ content_type ,
206+ {
207+ "sort_by" : "vote_average.desc" ,
208+ "vote_count.gte" : 500 ,
209+ f"{ prefix } .gte" : f"{ year } -01-01" ,
210+ f"{ prefix } .lte" : f"{ int (year )+ 9 } -12-31" ,
211+ "page" : page ,
212+ ** base_params ,
213+ },
214+ )
188215 )
189- )
190216 return tasks
191217
192218 def _build_discovery_tasks_phase2 (
193219 self ,
194220 profile : UserTasteProfile ,
195221 content_type : str ,
196222 excluded_genres : list [int ] | None = None ,
223+ pages_per_query : int = 1 ,
197224 ** opts ,
198225 ) -> list [Any ]:
199226 """Construct additional discovery tasks with lower thresholds to fill out candidate pool."""
@@ -202,32 +229,41 @@ def _build_discovery_tasks_phase2(
202229 top_cast = profile .cast .get_top_features (limit = 1 ) if opts .get ("use_cast" ) else []
203230
204231 tasks = []
205- base_params = {"vote_count.gte" : 400 , "page" : 2 }
232+ base_params = {"vote_count.gte" : 400 }
206233 if excluded_genres :
207234 base_params ["without_genres" ] = "|" .join ([str (g ) for g in excluded_genres ])
208235
236+ # Start from page 2 for phase 2, but fetch multiple pages if needed
237+ start_page = 2
238+ end_page = start_page + pages_per_query
239+
209240 if top_genres :
210241 genre_ids = "|" .join ([str (g [0 ]) for g in top_genres ])
211- tasks .append (
212- self ._fetch_discovery (
213- content_type , {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , ** base_params }
242+ for page in range (start_page , end_page ):
243+ tasks .append (
244+ self ._fetch_discovery (
245+ content_type ,
246+ {"with_genres" : genre_ids , "sort_by" : "vote_average.desc" , "page" : page , ** base_params },
247+ )
214248 )
215- )
216249
217250 if top_keywords :
218251 keyword_ids = "|" .join ([str (k [0 ]) for k in top_keywords ])
219- tasks .append (
220- self ._fetch_discovery (
221- content_type , {"with_keywords" : keyword_ids , "sort_by" : "vote_average.desc" , ** base_params }
252+ for page in range (start_page , end_page ):
253+ tasks .append (
254+ self ._fetch_discovery (
255+ content_type ,
256+ {"with_keywords" : keyword_ids , "sort_by" : "vote_average.desc" , "page" : page , ** base_params },
257+ )
222258 )
223- )
224259
225260 if top_cast :
226261 actor_id = top_cast [0 ][0 ]
227262 is_tv = content_type in ("tv" , "series" )
228- p = {"sort_by" : "vote_average.desc" , ** base_params }
229- p ["with_people" if is_tv else "with_cast" ] = str (actor_id )
230- tasks .append (self ._fetch_discovery (content_type , p ))
263+ for page in range (start_page , end_page ):
264+ p = {"sort_by" : "vote_average.desc" , "page" : page , ** base_params }
265+ p ["with_people" if is_tv else "with_cast" ] = str (actor_id )
266+ tasks .append (self ._fetch_discovery (content_type , p ))
231267
232268 return tasks
233269
0 commit comments