@@ -89,9 +89,11 @@ async def start(self):
8989 # Get the information and comments of the specified post
9090 await self .get_specified_videos (config .BILI_SPECIFIED_ID_LIST )
9191 elif config .CRAWLER_TYPE == "creator" :
92- # for creator_id in config.BILI_CREATOR_ID_LIST:
93- # await self.get_creator_videos(int(creator_id))
94- await self .get_all_creator_details (config .BILI_CREATOR_ID_LIST )
92+ if config .CREATOR_MODE :
93+ for creator_id in config .BILI_CREATOR_ID_LIST :
94+ await self .get_creator_videos (int (creator_id ))
95+ else :
96+ await self .get_all_creator_details (config .BILI_CREATOR_ID_LIST )
9597 else :
9698 pass
9799 utils .logger .info (
@@ -119,11 +121,9 @@ async def get_pubtime_datetime(start: str = config.START_DAY, end: str = config.
119121 start_day : datetime = datetime .strptime (start , '%Y-%m-%d' )
120122 end_day : datetime = datetime .strptime (end , '%Y-%m-%d' )
121123 if start_day > end_day :
122- raise ValueError (
123- 'Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end' )
124+ raise ValueError ('Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end' )
124125 elif start_day == end_day : # 搜索同一天的内容
125- end_day = start_day + timedelta (days = 1 ) - timedelta (
126- seconds = 1 ) # 则将 end_day 设置为 start_day + 1 day - 1 second
126+ end_day = start_day + timedelta (days = 1 ) - timedelta (seconds = 1 ) # 则将 end_day 设置为 start_day + 1 day - 1 second
127127 else : # 搜索 start 至 end
128128 end_day = end_day + timedelta (days = 1 ) - timedelta (seconds = 1 ) # 则将 end_day 设置为 end_day + 1 day - 1 second
129129 # 将其重新转换为时间戳
@@ -166,11 +166,9 @@ async def search(self):
166166 semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
167167 task_list = []
168168 try :
169- task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore )
170- for video_item in video_list ]
169+ task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
171170 except Exception as e :
172- utils .logger .warning (
173- f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. { e } " )
171+ utils .logger .warning (f"[BilibiliCrawler.search] error in the task list. The video for this page will not be included. { e } " )
174172 video_items = await asyncio .gather (* task_list )
175173 for video_item in video_items :
176174 if video_item :
@@ -184,23 +182,21 @@ async def search(self):
184182 else :
185183 for day in pd .date_range (start = config .START_DAY , end = config .END_DAY , freq = 'D' ):
186184 # 按照每一天进行爬取的时间戳参数
187- pubtime_begin_s , pubtime_end_s = await self .get_pubtime_datetime (start = day .strftime ('%Y-%m-%d' ),
188- end = day .strftime ('%Y-%m-%d' ))
185+ pubtime_begin_s , pubtime_end_s = await self .get_pubtime_datetime (start = day .strftime ('%Y-%m-%d' ), end = day .strftime ('%Y-%m-%d' ))
189186 page = 1
190- # !该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频
191- # !除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天
192- # !除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!!
187+ #!该段 while 语句在发生异常时(通常情况下为当天数据为空时)会自动跳转到下一天,以实现最大程度爬取该关键词下当天的所有视频
188+ #!除了仅保留现在原有的 try, except Exception 语句外,不要再添加其他的异常处理!!!否则将使该段代码失效,使其仅能爬取当天一天数据而无法跳转到下一天
189+ #!除非将该段代码的逻辑进行重构以实现相同的功能,否则不要进行修改!!!
193190 while (page - start_page + 1 ) * bili_limit_count <= config .CRAWLER_MAX_NOTES_COUNT :
194- # ! Catch any error if response return nothing, go to next day
191+ #! Catch any error if response return nothing, go to next day
195192 try :
196- # ! Don't skip any page, to make sure gather all video in one day
193+ #! Don't skip any page, to make sure gather all video in one day
197194 # if page < start_page:
198195 # utils.logger.info(f"[BilibiliCrawler.search] Skip page: {page}")
199196 # page += 1
200197 # continue
201198
202- utils .logger .info (
203- f"[BilibiliCrawler.search] search bilibili keyword: { keyword } , date: { day .ctime ()} , page: { page } " )
199+ utils .logger .info (f"[BilibiliCrawler.search] search bilibili keyword: { keyword } , date: { day .ctime ()} , page: { page } " )
204200 video_id_list : List [str ] = []
205201 videos_res = await self .bili_client .search_video_by_keyword (
206202 keyword = keyword ,
@@ -213,9 +209,7 @@ async def search(self):
213209 video_list : List [Dict ] = videos_res .get ("result" )
214210
215211 semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
216- task_list = [
217- self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for
218- video_item in video_list ]
212+ task_list = [self .get_video_info_task (aid = video_item .get ("aid" ), bvid = "" , semaphore = semaphore ) for video_item in video_list ]
219213 video_items = await asyncio .gather (* task_list )
220214 for video_item in video_items :
221215 if video_item :
0 commit comments