Skip to content

Commit b6caa7a

Browse files
committed
refactor: add xhs creator params
1 parent 1e3637f commit b6caa7a

File tree

3 files changed

+18
-16
lines changed

3 files changed

+18
-16
lines changed

config/xhs_config.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@
2121
# ........................
2222
]
2323

24-
# 指定创作者URL列表 (支持完整URL或纯ID)
25-
# 支持格式:
26-
# 1. 完整创作者主页URL (带xsec_token和xsec_source参数): "https://www.xiaohongshu.com/user/profile/5eb8e1d400000000010075ae?xsec_token=AB1nWBKCo1vE2HEkfoJUOi5B6BE5n7wVrbdpHoWIj5xHw=&xsec_source=pc_feed"
27-
# 2. 纯user_id: "63e36c9a000000002703502b"
24+
# 指定创作者URL列表,需要携带xsec_token和xsec_source参数
25+
2826
XHS_CREATOR_ID_LIST = [
29-
"https://www.xiaohongshu.com/user/profile/5eb8e1d400000000010075ae?xsec_token=AB1nWBKCo1vE2HEkfoJUOi5B6BE5n7wVrbdpHoWIj5xHw=&xsec_source=pc_feed",
30-
"63e36c9a000000002703502b",
27+
"https://www.xiaohongshu.com/user/profile/5f58bd990000000001003753?xsec_token=ABYVg1evluJZZzpMX-VWzchxQ1qSNVW3r-jOEnKqMcgZw=&xsec_source=pc_search"
3128
# ........................
3229
]

media_platform/xhs/client.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ async def request(self, method, url, **kwargs) -> Union[str, Any]:
142142
elif data["code"] == self.IP_ERROR_CODE:
143143
raise IPBlockError(self.IP_ERROR_STR)
144144
else:
145-
raise DataFetchError(data.get("msg", None))
145+
err_msg = data.get("msg", None) or f"{response.text}"
146+
raise DataFetchError(err_msg)
146147

147148
async def get(self, uri: str, params=None) -> Dict:
148149
"""
@@ -507,38 +508,40 @@ async def get_notes_by_creator(
507508
creator: str,
508509
cursor: str,
509510
page_size: int = 30,
511+
xsec_token: str = "",
512+
xsec_source: str = "pc_feed",
510513
) -> Dict:
511514
"""
512515
获取博主的笔记
513516
Args:
514517
creator: 博主ID
515518
cursor: 上一页最后一条笔记的ID
516519
page_size: 分页数据长度
520+
xsec_token: 验证token
521+
xsec_source: 渠道来源
517522
518523
Returns:
519524
520525
"""
521-
uri = "/api/sns/web/v1/user_posted"
522-
data = {
523-
"user_id": creator,
524-
"cursor": cursor,
525-
"num": page_size,
526-
"image_formats": "jpg,webp,avif",
527-
}
528-
return await self.get(uri, data)
526+
uri = f"/api/sns/web/v1/user_posted?num={page_size}&cursor={cursor}&user_id={creator}&xsec_token={xsec_token}&xsec_source={xsec_source}"
527+
return await self.get(uri)
529528

530529
async def get_all_notes_by_creator(
531530
self,
532531
user_id: str,
533532
crawl_interval: float = 1.0,
534533
callback: Optional[Callable] = None,
534+
xsec_token: str = "",
535+
xsec_source: str = "pc_feed",
535536
) -> List[Dict]:
536537
"""
537538
获取指定用户下的所有发过的帖子,该方法会一直查找一个用户下的所有帖子信息
538539
Args:
539540
user_id: 用户ID
540541
crawl_interval: 爬取一次的延迟单位(秒)
541542
callback: 一次分页爬取结束后的更新回调函数
543+
xsec_token: 验证token
544+
xsec_source: 渠道来源
542545
543546
Returns:
544547
@@ -547,7 +550,7 @@ async def get_all_notes_by_creator(
547550
notes_has_more = True
548551
notes_cursor = ""
549552
while notes_has_more and len(result) < config.CRAWLER_MAX_NOTES_COUNT:
550-
notes_res = await self.get_notes_by_creator(user_id, notes_cursor)
553+
notes_res = await self.get_notes_by_creator(user_id, notes_cursor, xsec_token=xsec_token, xsec_source=xsec_source)
551554
if not notes_res:
552555
utils.logger.error(
553556
f"[XiaoHongShuClient.get_notes_by_creator] The current creator may have been banned by xhs, so they cannot access the data."

media_platform/xhs/core.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ async def get_creators_and_notes(self) -> None:
201201
user_id=user_id,
202202
crawl_interval=crawl_interval,
203203
callback=self.fetch_creator_notes_detail,
204+
xsec_token=creator_info.xsec_token,
205+
xsec_source=creator_info.xsec_source,
204206
)
205207

206208
note_ids = []

0 commit comments

Comments
 (0)