Skip to content

Commit afbd4ec

Browse files
authored
Merge pull request #572 from crpa33/main
避免预料之外的数据为None的情况终止进程
2 parents dfddfa7 + 274d64a commit afbd4ec

File tree

4 files changed

+36
-11
lines changed

4 files changed

+36
-11
lines changed

media_platform/bilibili/core.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,13 @@ async def search(self):
161161
video_list: List[Dict] = videos_res.get("result")
162162

163163
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
164-
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
164+
task_list = []
165+
try:
166+
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
167+
except Exception as e :
168+
utils.logger.warning(
169+
f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}"
170+
)
165171
video_items = await asyncio.gather(*task_list)
166172
for video_item in video_items:
167173
if video_item:
@@ -199,7 +205,11 @@ async def search(self):
199205
video_list: List[Dict] = videos_res.get("result")
200206

201207
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
202-
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
208+
task_list = []
209+
try:
210+
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
211+
finally:
212+
pass
203213
video_items = await asyncio.gather(*task_list)
204214
for video_item in video_items:
205215
if video_item:

media_platform/douyin/core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ async def search(self) -> None:
108108
publish_time=PublishTimeType(config.PUBLISH_TIME_TYPE),
109109
search_id=dy_search_id
110110
)
111+
if posts_res.get("data") is None or posts_res.get("data") == []:
112+
utils.logger.info(f"[DouYinCrawler.search] search douyin keyword: {keyword}, page: {page} is empty,{posts_res.get('data')}`")
113+
break
111114
except DataFetchError:
112115
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
113116
break

media_platform/xhs/client.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,12 @@ async def get_comments_all_sub_comments(
415415
num=10,
416416
cursor=sub_comment_cursor,
417417
)
418+
419+
if comments_res is None:
420+
utils.logger.info(
421+
f"[XiaoHongShuClient.get_comments_all_sub_comments] No response found for note_id: {note_id}"
422+
)
423+
continue
418424
sub_comment_has_more = comments_res.get("has_more", False)
419425
sub_comment_cursor = comments_res.get("cursor", "")
420426
if "comments" not in comments_res:

media_platform/zhihu/help.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,15 +192,21 @@ def _extract_content_or_comment_author(author: Dict) -> ZhihuCreator:
192192
193193
"""
194194
res = ZhihuCreator()
195-
if not author:
196-
return res
197-
if not author.get("id"):
198-
author = author.get("member")
199-
res.user_id = author.get("id")
200-
res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}"
201-
res.user_nickname = author.get("name")
202-
res.user_avatar = author.get("avatar_url")
203-
res.url_token = author.get("url_token")
195+
try:
196+
if not author:
197+
return res
198+
if not author.get("id"):
199+
author = author.get("member")
200+
res.user_id = author.get("id")
201+
res.user_link = f"{zhihu_constant.ZHIHU_URL}/people/{author.get('url_token')}"
202+
res.user_nickname = author.get("name")
203+
res.user_avatar = author.get("avatar_url")
204+
res.url_token = author.get("url_token")
205+
206+
except Exception as e :
207+
utils.logger.warning(
208+
f"[ZhihuExtractor._extract_content_or_comment_author] User Maybe Blocked. {e}"
209+
)
204210
return res
205211

206212
def extract_comments(self, page_content: ZhihuContent, comments: List[Dict]) -> List[ZhihuComment]:

0 commit comments

Comments
 (0)