File tree Expand file tree Collapse file tree 4 files changed +36
-11
lines changed
Expand file tree Collapse file tree 4 files changed +36
-11
lines changed Original file line number Diff line number Diff line change @@ -161,7 +161,13 @@ async def search(self):
161161 video_list : List [Dict ] = videos_res .get ("result" )
162162
163163 semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
164- task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
164+ task_list = []
165+ try :
166+ task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
167+ except Exception as e :
168+ utils .logger .warning (
169+ f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. { e } "
170+ )
165171 video_items = await asyncio .gather (* task_list )
166172 for video_item in video_items :
167173 if video_item :
@@ -199,7 +205,11 @@ async def search(self):
199205 video_list : List [Dict ] = videos_res .get ("result" )
200206
201207 semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
202- task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
208+ task_list = []
209+ try :
210+ task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
211+ finally :
212+ pass
203213 video_items = await asyncio .gather (* task_list )
204214 for video_item in video_items :
205215 if video_item :
Original file line number Diff line number Diff line change @@ -108,6 +108,9 @@ async def search(self) -> None:
108108 publish_time = PublishTimeType (config .PUBLISH_TIME_TYPE ),
109109 search_id = dy_search_id
110110 )
111+ if posts_res .get ("data" ) is None or posts_res .get ("data" ) == []:
112+ utils .logger .info (f"[DouYinCrawler.search] search douyin keyword: { keyword } , page: { page } is empty,{ posts_res .get ('data' )} `" )
113+ break
111114 except DataFetchError :
112115 utils .logger .error (f"[DouYinCrawler.search] search douyin keyword: { keyword } failed" )
113116 break
Original file line number Diff line number Diff line change @@ -415,6 +415,12 @@ async def get_comments_all_sub_comments(
415415 num = 10 ,
416416 cursor = sub_comment_cursor ,
417417 )
418+
419+ if comments_res is None :
420+ utils .logger .info (
421+ f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: { note_id } "
422+ )
423+ continue
418424 sub_comment_has_more = comments_res .get ("has_more" , False )
419425 sub_comment_cursor = comments_res .get ("cursor" , "" )
420426 if "comments" not in comments_res :
Original file line number Diff line number Diff line change @@ -192,15 +192,21 @@ def _extract_content_or_comment_author(author: Dict) -> ZhihuCreator:
192192
193193 """
194194 res = ZhihuCreator ()
195- if not author :
196- return res
197- if not author .get ("id" ):
198- author = author .get ("member" )
199- res .user_id = author .get ("id" )
200- res .user_link = f"{ zhihu_constant .ZHIHU_URL } /people/{ author .get ('url_token' )} "
201- res .user_nickname = author .get ("name" )
202- res .user_avatar = author .get ("avatar_url" )
203- res .url_token = author .get ("url_token" )
195+ try :
196+ if not author :
197+ return res
198+ if not author .get ("id" ):
199+ author = author .get ("member" )
200+ res .user_id = author .get ("id" )
201+ res .user_link = f"{ zhihu_constant .ZHIHU_URL } /people/{ author .get ('url_token' )} "
202+ res .user_nickname = author .get ("name" )
203+ res .user_avatar = author .get ("avatar_url" )
204+ res .url_token = author .get ("url_token" )
205+
206+ except Exception as e :
207+ utils .logger .warning (
208+ f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. { e } "
209+ )
204210 return res
205211
206212 def extract_comments (self , page_content : ZhihuContent , comments : List [Dict ]) -> List [ZhihuComment ]:
You can’t perform that action at this time.
0 commit comments