@@ -185,8 +185,13 @@ async def scrape_profiles(urls: List[str]) -> List[Dict]:
185185
186186def parse_search (response : ScrapeApiResponse ) -> List [Dict ]:
187187 """parse search data from the API response"""
188- data = json .loads (response .scrape_result ["content" ])
189- search_data = data ["data" ]
188+ try :
189+ data = json .loads (response .scrape_result ["content" ])
190+ search_data = data ["data" ]
191+ except Exception as e :
192+ log .error (f"Failed to parse JSON from search API response: { e } " )
193+ return None
194+
190195 parsed_search = []
191196 for item in search_data :
192197 if item ["type" ] == 1 : # get the item if it was item only
@@ -212,7 +217,7 @@ def parse_search(response: ScrapeApiResponse) -> List[Dict]:
212217
213218async def obtain_session (url : str ) -> str :
214219 """create a session to save the cookies and authorize the search API"""
215- session_id = "tiktok_search_session"
220+ session_id = str ( uuid . uuid4 (). hex )
216221 await SCRAPFLY .async_scrape (ScrapeConfig (url , ** BASE_CONFIG , render_js = True , session = session_id ))
217222 return session_id
218223
@@ -265,7 +270,8 @@ def form_api_url(cursor: int):
265270 ]
266271 async for response in SCRAPFLY .concurrent_scrape (_other_pages ):
267272 data = parse_search (response )
268- search_data .extend (data )
273+ if data is not None :
274+ search_data .extend (data )
269275
270276 log .success (f"scraped { len (search_data )} from the search API from the keyword { keyword } " )
271277 return search_data
@@ -288,11 +294,11 @@ def parse_channel(videos: List[Dict]) -> List[Dict]:
288294 parsed_data .append (result )
289295 return parsed_data
290296
291- async def scrape_channel (url : str , max_videos : int = 100 , max_videos_per_request : int = 18 ) -> List [Dict ]:
297+ async def scrape_channel (url : str , max_pages : int = 5 , max_videos_per_request : int = 18 ) -> List [Dict ]:
292298 """scrape video data from a channel by calling the item_list API directly
293299 Args:
294300 url (str): The channel URL to scrape.
295- max_videos (int, optional): Maximum total number of videos to fetch. Defaults to 500 .
301+ max_pages (int, optional): Maximum number of pages to fetch. Defaults to 5 .
296302 max_videos_per_request (int, optional): Number of videos to request per API call.
297303 recommend to be within (10, 20). Some channels may fail if this value is set higher.
298304 """
@@ -362,13 +368,14 @@ def build_api_url(cursor: int = 0) -> str:
362368 all_videos = []
363369 cursor = 0
364370 has_more = True
371+ current_page = 0
365372
366373 # Create a session to maintain cookies
367374 session_id = "tiktok_channel_session"
368- log .info (f"starting video fetch loop, max_videos= { max_videos } " )
375+ log .info (f"starting video fetch loop, max_pages= { max_pages } " )
369376
370- while has_more and len ( all_videos ) < max_videos :
371- log .info (f"fetching videos batch, cursor: { cursor } , current total: { len (all_videos )} " )
377+ while has_more and current_page < max_pages :
378+ log .info (f"fetching videos batch, page: { current_page + 1 } / { max_pages } , cursor: { cursor } , current total: { len (all_videos )} " )
372379
373380 api_response = await SCRAPFLY .async_scrape (
374381 ScrapeConfig (
@@ -393,16 +400,11 @@ def build_api_url(cursor: int = 0) -> str:
393400 # Update cursor for next page
394401 has_more = data .get ("hasMore" , False )
395402 cursor = data .get ("cursor" , 0 )
396- log .debug (f"hasMore={ has_more } , next cursor={ cursor } " )
403+ current_page += 1
404+ log .debug (f"hasMore={ has_more } , next cursor={ cursor } , current_page={ current_page } " )
397405 else :
398406 log .warning ("no videos found in response, stopping pagination" )
399407 break
400-
401- # Stop if we've reached the desired count
402- if len (all_videos ) >= max_videos :
403- all_videos = all_videos [:max_videos ]
404- log .info (f"reached max_videos limit, truncating to { max_videos } " )
405- break
406408
407409 log .info (f"parsing { len (all_videos )} videos" )
408410 # Parse the video data
0 commit comments