Skip to content

Commit 6323e2d

Browse files
authored
Merge pull request #616 from chimeElm/main
修复CRAWLER_MAX_NOTES_COUNT在爬取小红书作者帖子时失效的问题
2 parents 23c8f8f + 26a8455 commit 6323e2d

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

media_platform/xhs/client.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ async def get_all_notes_by_creator(
498498
result = []
499499
notes_has_more = True
500500
notes_cursor = ""
501-
while notes_has_more:
501+
while notes_has_more and len(result) < config.CRAWLER_MAX_NOTES_COUNT:
502502
notes_res = await self.get_notes_by_creator(user_id, notes_cursor)
503503
if not notes_res:
504504
utils.logger.error(
@@ -518,10 +518,21 @@ async def get_all_notes_by_creator(
518518
utils.logger.info(
519519
f"[XiaoHongShuClient.get_all_notes_by_creator] got user_id:{user_id} notes len : {len(notes)}"
520520
)
521+
522+
remaining = config.CRAWLER_MAX_NOTES_COUNT - len(result)
523+
if remaining <= 0:
524+
break
525+
526+
notes_to_add = notes[:remaining]
521527
if callback:
522-
await callback(notes)
528+
await callback(notes_to_add)
529+
530+
result.extend(notes_to_add)
523531
await asyncio.sleep(crawl_interval)
524-
result.extend(notes)
532+
533+
utils.logger.info(
534+
f"[XiaoHongShuClient.get_all_notes_by_creator] Finished getting notes for user {user_id}, total: {len(result)}"
535+
)
525536
return result
526537

527538
async def get_note_short_url(self, note_id: str) -> Dict:

0 commit comments

Comments
 (0)