|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +# Copyright (c) 2025 [email protected] |
| 3 | +# |
| 4 | +# This file is part of MediaCrawler project. |
| 5 | +# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/xhs/playwright_sign.py |
| 6 | +# GitHub: https://github.com/NanmiCoder |
| 7 | +# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1 |
| 8 | +# |
| 9 | +# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: |
| 10 | +# 1. 不得用于任何商业用途。 |
| 11 | +# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 |
| 12 | +# 3. 不得进行大规模爬取或对平台造成运营干扰。 |
| 13 | +# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。 |
| 14 | +# 5. 不得用于任何非法或不当的用途。 |
| 15 | +# |
| 16 | +# 详细许可条款请参阅项目根目录下的LICENSE文件。 |
| 17 | +# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。 |
| 18 | + |
| 19 | +# 通过 Playwright 注入调用 window.mnsv2 生成小红书签名 |
| 20 | + |
| 21 | +import hashlib |
| 22 | +import json |
| 23 | +import time |
| 24 | +from typing import Any, Dict, Optional, Union |
| 25 | +from urllib.parse import urlparse |
| 26 | + |
| 27 | +from playwright.async_api import Page |
| 28 | + |
| 29 | +from .xhs_sign import b64_encode, encode_utf8, get_trace_id, mrc |
| 30 | + |
| 31 | + |
| 32 | +def _build_sign_string(uri: str, data: Optional[Union[Dict, str]] = None) -> str: |
| 33 | + """构建待签名字符串""" |
| 34 | + c = uri |
| 35 | + if data is not None: |
| 36 | + if isinstance(data, dict): |
| 37 | + c += json.dumps(data, separators=(",", ":"), ensure_ascii=False) |
| 38 | + elif isinstance(data, str): |
| 39 | + c += data |
| 40 | + return c |
| 41 | + |
| 42 | + |
| 43 | +def _md5_hex(s: str) -> str: |
| 44 | + """计算 MD5 哈希值""" |
| 45 | + return hashlib.md5(s.encode("utf-8")).hexdigest() |
| 46 | + |
| 47 | + |
| 48 | +def _build_xs_payload(x3_value: str, data_type: str = "object") -> str: |
| 49 | + """构建 x-s 签名""" |
| 50 | + s = { |
| 51 | + "x0": "4.2.1", |
| 52 | + "x1": "xhs-pc-web", |
| 53 | + "x2": "Mac OS", |
| 54 | + "x3": x3_value, |
| 55 | + "x4": data_type, |
| 56 | + } |
| 57 | + return "XYS_" + b64_encode(encode_utf8(json.dumps(s, separators=(",", ":")))) |
| 58 | + |
| 59 | + |
| 60 | +def _build_xs_common(a1: str, b1: str, x_s: str, x_t: str) -> str: |
| 61 | + """构建 x-s-common 请求头""" |
| 62 | + payload = { |
| 63 | + "s0": 3, |
| 64 | + "s1": "", |
| 65 | + "x0": "1", |
| 66 | + "x1": "4.2.2", |
| 67 | + "x2": "Mac OS", |
| 68 | + "x3": "xhs-pc-web", |
| 69 | + "x4": "4.74.0", |
| 70 | + "x5": a1, |
| 71 | + "x6": x_t, |
| 72 | + "x7": x_s, |
| 73 | + "x8": b1, |
| 74 | + "x9": mrc(x_t + x_s + b1), |
| 75 | + "x10": 154, |
| 76 | + "x11": "normal", |
| 77 | + } |
| 78 | + return b64_encode(encode_utf8(json.dumps(payload, separators=(",", ":")))) |
| 79 | + |
| 80 | + |
| 81 | +async def get_b1_from_localstorage(page: Page) -> str: |
| 82 | + """从 localStorage 获取 b1 值""" |
| 83 | + try: |
| 84 | + local_storage = await page.evaluate("() => window.localStorage") |
| 85 | + return local_storage.get("b1", "") |
| 86 | + except Exception: |
| 87 | + return "" |
| 88 | + |
| 89 | + |
| 90 | +async def call_mnsv2(page: Page, sign_str: str, md5_str: str) -> str: |
| 91 | + """ |
| 92 | + 通过 playwright 调用 window.mnsv2 函数 |
| 93 | +
|
| 94 | + Args: |
| 95 | + page: playwright Page 对象 |
| 96 | + sign_str: 待签名字符串 (uri + JSON.stringify(data)) |
| 97 | + md5_str: sign_str 的 MD5 哈希值 |
| 98 | +
|
| 99 | + Returns: |
| 100 | + mnsv2 返回的签名字符串 |
| 101 | + """ |
| 102 | + sign_str_escaped = sign_str.replace("\\", "\\\\").replace("'", "\\'").replace("\n", "\\n") |
| 103 | + md5_str_escaped = md5_str.replace("\\", "\\\\").replace("'", "\\'") |
| 104 | + |
| 105 | + try: |
| 106 | + result = await page.evaluate(f"window.mnsv2('{sign_str_escaped}', '{md5_str_escaped}')") |
| 107 | + return result if result else "" |
| 108 | + except Exception: |
| 109 | + return "" |
| 110 | + |
| 111 | + |
| 112 | +async def sign_xs_with_playwright( |
| 113 | + page: Page, |
| 114 | + uri: str, |
| 115 | + data: Optional[Union[Dict, str]] = None, |
| 116 | +) -> str: |
| 117 | + """ |
| 118 | + 通过 playwright 注入生成 x-s 签名 |
| 119 | +
|
| 120 | + Args: |
| 121 | + page: playwright Page 对象(必须已打开小红书页面) |
| 122 | + uri: API 路径,如 "/api/sns/web/v1/search/notes" |
| 123 | + data: 请求数据(GET 的 params 或 POST 的 payload) |
| 124 | +
|
| 125 | + Returns: |
| 126 | + x-s 签名字符串 |
| 127 | + """ |
| 128 | + sign_str = _build_sign_string(uri, data) |
| 129 | + md5_str = _md5_hex(sign_str) |
| 130 | + x3_value = await call_mnsv2(page, sign_str, md5_str) |
| 131 | + data_type = "object" if isinstance(data, (dict, list)) else "string" |
| 132 | + return _build_xs_payload(x3_value, data_type) |
| 133 | + |
| 134 | + |
| 135 | +async def sign_with_playwright( |
| 136 | + page: Page, |
| 137 | + uri: str, |
| 138 | + data: Optional[Union[Dict, str]] = None, |
| 139 | + a1: str = "", |
| 140 | +) -> Dict[str, Any]: |
| 141 | + """ |
| 142 | + 通过 playwright 生成完整的签名请求头 |
| 143 | +
|
| 144 | + Args: |
| 145 | + page: playwright Page 对象(必须已打开小红书页面) |
| 146 | + uri: API 路径 |
| 147 | + data: 请求数据 |
| 148 | + a1: cookie 中的 a1 值 |
| 149 | +
|
| 150 | + Returns: |
| 151 | + 包含 x-s, x-t, x-s-common, x-b3-traceid 的字典 |
| 152 | + """ |
| 153 | + b1 = await get_b1_from_localstorage(page) |
| 154 | + x_s = await sign_xs_with_playwright(page, uri, data) |
| 155 | + x_t = str(int(time.time() * 1000)) |
| 156 | + |
| 157 | + return { |
| 158 | + "x-s": x_s, |
| 159 | + "x-t": x_t, |
| 160 | + "x-s-common": _build_xs_common(a1, b1, x_s, x_t), |
| 161 | + "x-b3-traceid": get_trace_id(), |
| 162 | + } |
| 163 | + |
| 164 | + |
| 165 | +async def pre_headers_with_playwright( |
| 166 | + page: Page, |
| 167 | + url: str, |
| 168 | + cookie_dict: Dict[str, str], |
| 169 | + params: Optional[Dict] = None, |
| 170 | + payload: Optional[Dict] = None, |
| 171 | +) -> Dict[str, str]: |
| 172 | + """ |
| 173 | + 使用 playwright 注入方式生成请求头签名 |
| 174 | + 可直接替换 client.py 中的 _pre_headers 方法 |
| 175 | +
|
| 176 | + Args: |
| 177 | + page: playwright Page 对象 |
| 178 | + url: 请求 URL |
| 179 | + cookie_dict: cookie 字典 |
| 180 | + params: GET 请求参数 |
| 181 | + payload: POST 请求参数 |
| 182 | +
|
| 183 | + Returns: |
| 184 | + 签名后的请求头字典 |
| 185 | + """ |
| 186 | + a1_value = cookie_dict.get("a1", "") |
| 187 | + uri = urlparse(url).path |
| 188 | + |
| 189 | + if params is not None: |
| 190 | + data = params |
| 191 | + elif payload is not None: |
| 192 | + data = payload |
| 193 | + else: |
| 194 | + raise ValueError("params or payload is required") |
| 195 | + |
| 196 | + signs = await sign_with_playwright(page, uri, data, a1_value) |
| 197 | + |
| 198 | + return { |
| 199 | + "X-S": signs["x-s"], |
| 200 | + "X-T": signs["x-t"], |
| 201 | + "x-S-Common": signs["x-s-common"], |
| 202 | + "X-B3-Traceid": signs["x-b3-traceid"], |
| 203 | + } |
0 commit comments