@@ -89,8 +89,11 @@ async def start(self):
8989 # Get the information and comments of the specified post
9090 await self .get_specified_videos (config .BILI_SPECIFIED_ID_LIST )
9191 elif config .CRAWLER_TYPE == "creator" :
92- for creator_id in config .BILI_CREATOR_ID_LIST :
93- await self .get_creator_videos (int (creator_id ))
92+ if config .CREATOR_MODE :
93+ for creator_id in config .BILI_CREATOR_ID_LIST :
94+ await self .get_creator_videos (int (creator_id ))
95+ else :
96+ await self .get_all_creator_details (config .BILI_CREATOR_ID_LIST )
9497 else :
9598 pass
9699 utils .logger .info (
@@ -125,7 +128,7 @@ async def get_pubtime_datetime(start: str = config.START_DAY, end: str = config.
125128 end_day = end_day + timedelta (days = 1 ) - timedelta (seconds = 1 ) # 则将 end_day 设置为 end_day + 1 day - 1 second
126129 # 将其重新转换为时间戳
127130 return str (int (start_day .timestamp ())), str (int (end_day .timestamp ()))
128-
131+
129132 async def search (self ):
130133 """
131134 search bilibili video with keywords
@@ -466,3 +469,121 @@ async def get_bilibili_video(self, video_item: Dict, semaphore: asyncio.Semaphor
466469 extension_file_name = f"video.mp4"
467470 await bilibili_store .store_video (aid , content , extension_file_name )
468471
472+ async def get_all_creator_details (self , creator_id_list : List [int ]):
473+ """
474+ creator_id_list: get details for creator from creator_id_list
475+ """
476+ utils .logger .info (
477+ f"[BilibiliCrawler.get_creator_details] Crawling the detalis of creator" )
478+ utils .logger .info (
479+ f"[BilibiliCrawler.get_creator_details] creator ids:{ creator_id_list } " )
480+
481+ semaphore = asyncio .Semaphore (config .MAX_CONCURRENCY_NUM )
482+ task_list : List [Task ] = []
483+ try :
484+ for creator_id in creator_id_list :
485+ task = asyncio .create_task (self .get_creator_details (
486+ creator_id , semaphore ), name = creator_id )
487+ task_list .append (task )
488+ except Exception as e :
489+ utils .logger .warning (
490+ f"[BilibiliCrawler.get_all_creator_details] error in the task list. The creator will not be included. { e } " )
491+
492+ await asyncio .gather (* task_list )
493+
494+ async def get_creator_details (self , creator_id : int , semaphore : asyncio .Semaphore ):
495+ """
496+ get details for creator id
497+ :param creator_id:
498+ :param semaphore:
499+ :return:
500+ """
501+ async with semaphore :
502+ creator_unhandled_info : Dict = await self .bili_client .get_creator_info (creator_id )
503+ creator_info : Dict = {
504+ "id" : creator_id ,
505+ "name" : creator_unhandled_info .get ("name" ),
506+ "sign" : creator_unhandled_info .get ("sign" ),
507+ "avatar" : creator_unhandled_info .get ("face" ),
508+ }
509+ await self .get_fans (creator_info , semaphore )
510+ await self .get_followings (creator_info , semaphore )
511+ await self .get_dynamics (creator_info , semaphore )
512+
513+ async def get_fans (self , creator_info : Dict , semaphore : asyncio .Semaphore ):
514+ """
515+ get fans for creator id
516+ :param creator_info:
517+ :param semaphore:
518+ :return:
519+ """
520+ creator_id = creator_info ["id" ]
521+ async with semaphore :
522+ try :
523+ utils .logger .info (
524+ f"[BilibiliCrawler.get_fans] begin get creator_id: { creator_id } fans ..." )
525+ await self .bili_client .get_creator_all_fans (
526+ creator_info = creator_info ,
527+ crawl_interval = random .random (),
528+ callback = bilibili_store .batch_update_bilibili_creator_fans ,
529+ max_count = config .CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES ,
530+ )
531+
532+ except DataFetchError as ex :
533+ utils .logger .error (
534+ f"[BilibiliCrawler.get_fans] get creator_id: { creator_id } fans error: { ex } " )
535+ except Exception as e :
536+ utils .logger .error (
537+ f"[BilibiliCrawler.get_fans] may be been blocked, err:{ e } " )
538+
539+ async def get_followings (self , creator_info : Dict , semaphore : asyncio .Semaphore ):
540+ """
541+ get followings for creator id
542+ :param creator_info:
543+ :param semaphore:
544+ :return:
545+ """
546+ creator_id = creator_info ["id" ]
547+ async with semaphore :
548+ try :
549+ utils .logger .info (
550+ f"[BilibiliCrawler.get_followings] begin get creator_id: { creator_id } followings ..." )
551+ await self .bili_client .get_creator_all_followings (
552+ creator_info = creator_info ,
553+ crawl_interval = random .random (),
554+ callback = bilibili_store .batch_update_bilibili_creator_followings ,
555+ max_count = config .CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES ,
556+ )
557+
558+ except DataFetchError as ex :
559+ utils .logger .error (
560+ f"[BilibiliCrawler.get_followings] get creator_id: { creator_id } followings error: { ex } " )
561+ except Exception as e :
562+ utils .logger .error (
563+ f"[BilibiliCrawler.get_followings] may be been blocked, err:{ e } " )
564+
565+ async def get_dynamics (self , creator_info : Dict , semaphore : asyncio .Semaphore ):
566+ """
567+ get dynamics for creator id
568+ :param creator_info:
569+ :param semaphore:
570+ :return:
571+ """
572+ creator_id = creator_info ["id" ]
573+ async with semaphore :
574+ try :
575+ utils .logger .info (
576+ f"[BilibiliCrawler.get_dynamics] begin get creator_id: { creator_id } dynamics ..." )
577+ await self .bili_client .get_creator_all_dynamics (
578+ creator_info = creator_info ,
579+ crawl_interval = random .random (),
580+ callback = bilibili_store .batch_update_bilibili_creator_dynamics ,
581+ max_count = config .CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES ,
582+ )
583+
584+ except DataFetchError as ex :
585+ utils .logger .error (
586+ f"[BilibiliCrawler.get_dynamics] get creator_id: { creator_id } dynamics error: { ex } " )
587+ except Exception as e :
588+ utils .logger .error (
589+ f"[BilibiliCrawler.get_dynamics] may be been blocked, err:{ e } " )
0 commit comments