@@ -164,10 +164,8 @@ async def search(self):
164164 task_list = []
165165 try :
166166 task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
167- except Exception as e :
168- utils .logger .warning (
169- f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. { e } "
170- )
167+ except Exception as e :
168+ utils .logger .warning (f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. { e } " )
171169 video_items = await asyncio .gather (* task_list )
172170 for video_item in video_items :
173171 if video_item :
@@ -177,16 +175,19 @@ async def search(self):
177175 await self .get_bilibili_video (video_item , semaphore )
178176 page += 1
179177 await self .batch_get_video_comments (video_id_list )
180- # 按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下的所有视频
178+ # 按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下每一天的所有视频
181179 else :
182180 for day in pd .date_range (start = config .START_DAY , end = config .END_DAY , freq = 'D' ):
183181 # 按照每一天进行爬取的时间戳参数
184182 pubtime_begin_s , pubtime_end_s = await self .get_pubtime_datetime (start = day .strftime ('%Y-%m-%d' ), end = day .strftime ('%Y-%m-%d' ))
185183 page = 1
184+ #!该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频
185+ #!除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天
186+ #!除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!!
186187 while (page - start_page + 1 ) * bili_limit_count <= config .CRAWLER_MAX_NOTES_COUNT :
187- # ! Catch any error if response return nothing, go to next day
188+ #! Catch any error if response return nothing, go to next day
188189 try :
189- # ! Don't skip any page, to make sure gather all video in one day
190+ #! Don't skip any page, to make sure gather all video in one day
190191 # if page < start_page:
191192 # utils.logger.info(f"[BilibiliCrawler.search] Skip page: {page}")
192193 # page += 1
@@ -205,11 +206,7 @@ async def search(self):
205206 video_list : List [Dict ] = videos_res .get ("result" )
206207
207208 semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
208- task_list = []
209- try :
210- task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
211- finally :
212- pass
209+ task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
213210 video_items = await asyncio .gather (* task_list )
214211 for video_item in video_items :
215212 if video_item :
0 commit comments