Skip to content

Commit af5a393

Browse files
authored
Update core.py,删除了其它代码贡献者所添加的try-catch语句,该段try-catch语句将会影响其代码的最终逻辑并令其失效,使其仅能爬取当天一天数据而无法跳转到下一天(原先的逻辑就是try-catch捕获异常从而进入下一天,不要再向该语句中添加捕获异常操作或者finally语句!)
1 parent b675547 commit af5a393

File tree

1 file changed

+9
-12
lines changed

1 file changed

+9
-12
lines changed

media_platform/bilibili/core.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,8 @@ async def search(self):
164164
task_list = []
165165
try:
166166
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
167-
except Exception as e :
168-
utils.logger.warning(
169-
f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}"
170-
)
167+
except Exception as e:
168+
utils.logger.warning(f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. {e}")
171169
video_items = await asyncio.gather(*task_list)
172170
for video_item in video_items:
173171
if video_item:
@@ -177,16 +175,19 @@ async def search(self):
177175
await self.get_bilibili_video(video_item, semaphore)
178176
page += 1
179177
await self.batch_get_video_comments(video_id_list)
180-
# 按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下的所有视频
178+
# 按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下每一天的所有视频
181179
else:
182180
for day in pd.date_range(start=config.START_DAY, end=config.END_DAY, freq='D'):
183181
# 按照每一天进行爬取的时间戳参数
184182
pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(start=day.strftime('%Y-%m-%d'), end=day.strftime('%Y-%m-%d'))
185183
page = 1
184+
#!该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频
185+
#!除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天
186+
#!除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!!
186187
while (page - start_page + 1) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
187-
# ! Catch any error if response return nothing, go to next day
188+
#! Catch any error if response return nothing, go to next day
188189
try:
189-
# ! Don't skip any page, to make sure gather all video in one day
190+
#! Don't skip any page, to make sure gather all video in one day
190191
# if page < start_page:
191192
# utils.logger.info(f"[BilibiliCrawler.search] Skip page: {page}")
192193
# page += 1
@@ -205,11 +206,7 @@ async def search(self):
205206
video_list: List[Dict] = videos_res.get("result")
206207

207208
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
208-
task_list = []
209-
try:
210-
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
211-
finally:
212-
pass
209+
task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
213210
video_items = await asyncio.gather(*task_list)
214211
for video_item in video_items:
215212
if video_item:

0 commit comments

Comments
 (0)