Skip to content

Commit 2600c48

Browse files
authored
fix: xhs sub comment sign error
fix: params参数以及路径问题
2 parents 630d4c1 + ff9a162 commit 2600c48

File tree

2 files changed

+59
-20
lines changed

2 files changed

+59
-20
lines changed

media_platform/xhs/client.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,13 @@ async def _pre_headers(self, url: str, params: Optional[Dict] = None, payload: O
8181
"""
8282
a1_value = self.cookie_dict.get("a1", "")
8383

84-
# 确定请求数据和 URI
84+
# 确定请求数据、方法和 URI
8585
if params is not None:
8686
data = params
87+
method = "GET"
8788
elif payload is not None:
8889
data = payload
90+
method = "POST"
8991
else:
9092
raise ValueError("params or payload is required")
9193

@@ -95,6 +97,7 @@ async def _pre_headers(self, url: str, params: Optional[Dict] = None, payload: O
9597
uri=url,
9698
data=data,
9799
a1=a1_value,
100+
method=method,
98101
)
99102

100103
headers = {
@@ -156,15 +159,10 @@ async def get(self, uri: str, params: Optional[Dict] = None) -> Dict:
156159
157160
"""
158161
headers = await self._pre_headers(uri, params)
159-
if isinstance(params, dict):
160-
# 构建带参数的完整 URL
161-
query_string = urlencode(params)
162-
full_url = f"{self._host}{uri}?{query_string}"
163-
else:
164-
full_url = f"{self._host}{uri}"
162+
full_url = f"{self._host}{uri}"
165163

166164
return await self.request(
167-
method="GET", url=full_url, headers=headers
165+
method="GET", url=full_url, headers=headers, params=params
168166
)
169167

170168
async def post(self, uri: str, data: dict, **kwargs) -> Dict:
@@ -362,7 +360,7 @@ async def get_note_sub_comments(
362360
params = {
363361
"note_id": note_id,
364362
"root_comment_id": root_comment_id,
365-
"num": num,
363+
"num": str(num),
366364
"cursor": cursor,
367365
"image_formats": "jpg,webp,avif",
368366
"top_comment_id": "",

media_platform/xhs/playwright_sign.py

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,56 @@
2222
import json
2323
import time
2424
from typing import Any, Dict, Optional, Union
25-
from urllib.parse import urlparse
25+
from urllib.parse import urlparse, quote
2626

2727
from playwright.async_api import Page
2828

2929
from .xhs_sign import b64_encode, encode_utf8, get_trace_id, mrc
3030

3131

32-
def _build_sign_string(uri: str, data: Optional[Union[Dict, str]] = None) -> str:
33-
"""构建待签名字符串"""
34-
c = uri
35-
if data is not None:
32+
def _build_sign_string(uri: str, data: Optional[Union[Dict, str]] = None, method: str = "POST") -> str:
33+
"""构建待签名字符串
34+
35+
Args:
36+
uri: API路径
37+
data: 请求数据
38+
method: 请求方法 (GET 或 POST)
39+
40+
Returns:
41+
待签名字符串
42+
"""
43+
if method.upper() == "POST":
44+
# POST 请求使用 JSON 格式
45+
c = uri
46+
if data is not None:
47+
if isinstance(data, dict):
48+
c += json.dumps(data, separators=(",", ":"), ensure_ascii=False)
49+
elif isinstance(data, str):
50+
c += data
51+
return c
52+
else:
53+
# GET 请求使用查询字符串格式
54+
if not data or (isinstance(data, dict) and len(data) == 0):
55+
return uri
56+
3657
if isinstance(data, dict):
37-
c += json.dumps(data, separators=(",", ":"), ensure_ascii=False)
58+
params = []
59+
for key in data.keys():
60+
value = data[key]
61+
if isinstance(value, list):
62+
value_str = ",".join(str(v) for v in value)
63+
elif value is not None:
64+
value_str = str(value)
65+
else:
66+
value_str = ""
67+
# 使用URL编码(safe参数保留某些字符不编码)
68+
# 注意:httpx会对逗号、等号等字符进行编码,我们也需要同样处理
69+
value_str = quote(value_str, safe='')
70+
params.append(f"{key}={value_str}")
71+
return f"{uri}?{'&'.join(params)}"
3872
elif isinstance(data, str):
39-
c += data
40-
return c
73+
return f"{uri}?{data}"
74+
return uri
4175

4276

4377
def _md5_hex(s: str) -> str:
@@ -113,6 +147,7 @@ async def sign_xs_with_playwright(
113147
page: Page,
114148
uri: str,
115149
data: Optional[Union[Dict, str]] = None,
150+
method: str = "POST",
116151
) -> str:
117152
"""
118153
通过 playwright 注入生成 x-s 签名
@@ -121,11 +156,12 @@ async def sign_xs_with_playwright(
121156
page: playwright Page 对象(必须已打开小红书页面)
122157
uri: API 路径,如 "/api/sns/web/v1/search/notes"
123158
data: 请求数据(GET 的 params 或 POST 的 payload)
159+
method: 请求方法 (GET 或 POST)
124160
125161
Returns:
126162
x-s 签名字符串
127163
"""
128-
sign_str = _build_sign_string(uri, data)
164+
sign_str = _build_sign_string(uri, data, method)
129165
md5_str = _md5_hex(sign_str)
130166
x3_value = await call_mnsv2(page, sign_str, md5_str)
131167
data_type = "object" if isinstance(data, (dict, list)) else "string"
@@ -137,6 +173,7 @@ async def sign_with_playwright(
137173
uri: str,
138174
data: Optional[Union[Dict, str]] = None,
139175
a1: str = "",
176+
method: str = "POST",
140177
) -> Dict[str, Any]:
141178
"""
142179
通过 playwright 生成完整的签名请求头
@@ -146,12 +183,13 @@ async def sign_with_playwright(
146183
uri: API 路径
147184
data: 请求数据
148185
a1: cookie 中的 a1 值
186+
method: 请求方法 (GET 或 POST)
149187
150188
Returns:
151189
包含 x-s, x-t, x-s-common, x-b3-traceid 的字典
152190
"""
153191
b1 = await get_b1_from_localstorage(page)
154-
x_s = await sign_xs_with_playwright(page, uri, data)
192+
x_s = await sign_xs_with_playwright(page, uri, data, method)
155193
x_t = str(int(time.time() * 1000))
156194

157195
return {
@@ -186,14 +224,17 @@ async def pre_headers_with_playwright(
186224
a1_value = cookie_dict.get("a1", "")
187225
uri = urlparse(url).path
188226

227+
# 确定请求数据和方法
189228
if params is not None:
190229
data = params
230+
method = "GET"
191231
elif payload is not None:
192232
data = payload
233+
method = "POST"
193234
else:
194235
raise ValueError("params or payload is required")
195236

196-
signs = await sign_with_playwright(page, uri, data, a1_value)
237+
signs = await sign_with_playwright(page, uri, data, a1_value, method)
197238

198239
return {
199240
"X-S": signs["x-s"],

0 commit comments

Comments
 (0)