|
41 | 41 | from .client import BilibiliClient |
42 | 42 | from .exception import DataFetchError |
43 | 43 | from .field import SearchOrderType |
| 44 | +from .help import parse_video_info_from_url, parse_creator_info_from_url |
44 | 45 | from .login import BilibiliLogin |
45 | 46 |
|
46 | 47 |
|
@@ -103,8 +104,14 @@ async def start(self): |
103 | 104 | await self.get_specified_videos(config.BILI_SPECIFIED_ID_LIST) |
104 | 105 | elif config.CRAWLER_TYPE == "creator": |
105 | 106 | if config.CREATOR_MODE: |
106 | | - for creator_id in config.BILI_CREATOR_ID_LIST: |
107 | | - await self.get_creator_videos(int(creator_id)) |
| 107 | + for creator_url in config.BILI_CREATOR_ID_LIST: |
| 108 | + try: |
| 109 | + creator_info = parse_creator_info_from_url(creator_url) |
| 110 | + utils.logger.info(f"[BilibiliCrawler.start] Parsed creator ID: {creator_info.creator_id} from {creator_url}") |
| 111 | + await self.get_creator_videos(int(creator_info.creator_id)) |
| 112 | + except ValueError as e: |
| 113 | + utils.logger.error(f"[BilibiliCrawler.start] Failed to parse creator URL: {e}") |
| 114 | + continue |
108 | 115 | else: |
109 | 116 | await self.get_all_creator_details(config.BILI_CREATOR_ID_LIST) |
110 | 117 | else: |
@@ -362,11 +369,23 @@ async def get_creator_videos(self, creator_id: int): |
362 | 369 | utils.logger.info(f"[BilibiliCrawler.get_creator_videos] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {pn}") |
363 | 370 | pn += 1 |
364 | 371 |
|
365 | | - async def get_specified_videos(self, bvids_list: List[str]): |
| 372 | + async def get_specified_videos(self, video_url_list: List[str]): |
366 | 373 | """ |
367 | | - get specified videos info |
| 374 | + get specified videos info from URLs or BV IDs |
| 375 | + :param video_url_list: List of video URLs or BV IDs |
368 | 376 | :return: |
369 | 377 | """ |
| 378 | + utils.logger.info("[BilibiliCrawler.get_specified_videos] Parsing video URLs...") |
| 379 | + bvids_list = [] |
| 380 | + for video_url in video_url_list: |
| 381 | + try: |
| 382 | + video_info = parse_video_info_from_url(video_url) |
| 383 | + bvids_list.append(video_info.video_id) |
| 384 | + utils.logger.info(f"[BilibiliCrawler.get_specified_videos] Parsed video ID: {video_info.video_id} from {video_url}") |
| 385 | + except ValueError as e: |
| 386 | + utils.logger.error(f"[BilibiliCrawler.get_specified_videos] Failed to parse video URL: {e}") |
| 387 | + continue |
| 388 | + |
370 | 389 | semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) |
371 | 390 | task_list = [self.get_video_info_task(aid=0, bvid=video_id, semaphore=semaphore) for video_id in bvids_list] |
372 | 391 | video_details = await asyncio.gather(*task_list) |
@@ -568,18 +587,30 @@ async def get_bilibili_video(self, video_item: Dict, semaphore: asyncio.Semaphor |
568 | 587 | extension_file_name = f"video.mp4" |
569 | 588 | await bilibili_store.store_video(aid, content, extension_file_name) |
570 | 589 |
|
571 | | - async def get_all_creator_details(self, creator_id_list: List[int]): |
| 590 | + async def get_all_creator_details(self, creator_url_list: List[str]): |
572 | 591 | """ |
573 | | - creator_id_list: get details for creator from creator_id_list |
| 592 | + creator_url_list: get details for creator from creator URL list |
574 | 593 | """ |
575 | | - utils.logger.info(f"[BilibiliCrawler.get_creator_details] Crawling the detalis of creator") |
576 | | - utils.logger.info(f"[BilibiliCrawler.get_creator_details] creator ids:{creator_id_list}") |
| 594 | + utils.logger.info(f"[BilibiliCrawler.get_all_creator_details] Crawling the details of creators") |
| 595 | + utils.logger.info(f"[BilibiliCrawler.get_all_creator_details] Parsing creator URLs...") |
| 596 | + |
| 597 | + creator_id_list = [] |
| 598 | + for creator_url in creator_url_list: |
| 599 | + try: |
| 600 | + creator_info = parse_creator_info_from_url(creator_url) |
| 601 | + creator_id_list.append(int(creator_info.creator_id)) |
| 602 | + utils.logger.info(f"[BilibiliCrawler.get_all_creator_details] Parsed creator ID: {creator_info.creator_id} from {creator_url}") |
| 603 | + except ValueError as e: |
| 604 | + utils.logger.error(f"[BilibiliCrawler.get_all_creator_details] Failed to parse creator URL: {e}") |
| 605 | + continue |
| 606 | + |
| 607 | + utils.logger.info(f"[BilibiliCrawler.get_all_creator_details] creator ids:{creator_id_list}") |
577 | 608 |
|
578 | 609 | semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM) |
579 | 610 | task_list: List[Task] = [] |
580 | 611 | try: |
581 | 612 | for creator_id in creator_id_list: |
582 | | - task = asyncio.create_task(self.get_creator_details(creator_id, semaphore), name=creator_id) |
| 613 | + task = asyncio.create_task(self.get_creator_details(creator_id, semaphore), name=str(creator_id)) |
583 | 614 | task_list.append(task) |
584 | 615 | except Exception as e: |
585 | 616 | utils.logger.warning(f"[BilibiliCrawler.get_all_creator_details] error in the task list. The creator will not be included. {e}") |
|
0 commit comments