Skip to content

Commit c90e71c

Browse files
DIYgodclaude
andauthored
feat(xiaohongshu): add proxy configuration support (#20952)
Allow xiaohongshu routes to bypass anti-scraping measures by routing requests through a configurable proxy. When XIAOHONGSHU_PROXY is set, all xiaohongshu HTTP requests (user, board, notes) will be routed through the proxy endpoint instead of using Puppeteer. Co-authored-by: Claude Haiku 4.5 <[email protected]>
1 parent 05e8d16 commit c90e71c

File tree

2 files changed

+68
-6
lines changed

2 files changed

+68
-6
lines changed

lib/config.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ type ConfigEnvKeys =
219219
| 'XIAOYUZHOU_ID'
220220
| 'XIAOYUZHOU_TOKEN'
221221
| 'XIAOHONGSHU_COOKIE'
222+
| 'XIAOHONGSHU_PROXY'
222223
| 'XIMALAYA_TOKEN'
223224
| 'XSIJISHE_COOKIE'
224225
| 'XSIJISHE_USER_AGENT'
@@ -637,6 +638,7 @@ export type Config = {
637638
};
638639
xiaohongshu: {
639640
cookie?: string;
641+
proxy?: string;
640642
};
641643
ximalaya: {
642644
token?: string;
@@ -1115,6 +1117,7 @@ const calculateValue = () => {
11151117
},
11161118
xiaohongshu: {
11171119
cookie: envs.XIAOHONGSHU_COOKIE,
1120+
proxy: envs.XIAOHONGSHU_PROXY,
11181121
},
11191122
ximalaya: {
11201123
token: envs.XIMALAYA_TOKEN,

lib/routes/xiaohongshu/util.ts

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,40 @@ const getHeaders = (cookie?: string) => ({
2929
...(cookie ? { Cookie: cookie } : {}),
3030
});
3131

32+
// Fetch HTML through proxy when configured
33+
async function fetchWithProxy(url: string, cookie?: string): Promise<string> {
34+
const proxy = config.xiaohongshu.proxy;
35+
if (proxy) {
36+
const proxyUrl = `${proxy}?url=${encodeURIComponent(url)}`;
37+
logger.http(`Requesting ${url} via proxy`);
38+
return await ofetch(proxyUrl, { parseResponse: (txt) => txt });
39+
}
40+
logger.http(`Requesting ${url}`);
41+
return await ofetch(url, {
42+
headers: getHeaders(cookie),
43+
});
44+
}
45+
3246
const getUser = (url, cache) =>
3347
cache.tryGet(
3448
url,
3549
async () => {
50+
// Use proxy if configured
51+
if (config.xiaohongshu.proxy) {
52+
const res = await fetchWithProxy(url);
53+
const $ = load(res);
54+
const script = extractInitialState($);
55+
const state = JSON.parse(script);
56+
57+
let { userPageData, notes } = state.user;
58+
userPageData = userPageData._rawValue || userPageData;
59+
notes = notes._rawValue || notes;
60+
61+
// Cannot get collect data without puppeteer
62+
return { userPageData, notes, collect: '' };
63+
}
64+
65+
// Use puppeteer
3666
const { page, destory } = await getPuppeteerPage(url, {
3767
onBeforeLoad: async (page) => {
3868
await page.setRequestInterception(true);
@@ -88,6 +118,16 @@ const getBoard = (url, cache) =>
88118
cache.tryGet(
89119
url,
90120
async () => {
121+
// Use proxy if configured
122+
if (config.xiaohongshu.proxy) {
123+
const res = await fetchWithProxy(url);
124+
const $ = load(res);
125+
const script = extractInitialSsrState($);
126+
const state = JSON.parse(script);
127+
return state.Main;
128+
}
129+
130+
// Use puppeteer
91131
const browser = await puppeteer();
92132
try {
93133
const page = await browser.newPage();
@@ -156,9 +196,7 @@ async function renderNotesFulltext(notes, urlPrex, displayLivePhoto) {
156196

157197
async function getFullNote(link, displayLivePhoto) {
158198
const data = (await cache.tryGet(link, async () => {
159-
const res = await ofetch(link, {
160-
headers: getHeaders(config.xiaohongshu.cookie),
161-
});
199+
const res = await fetchWithProxy(link, config.xiaohongshu.cookie);
162200
const $ = load(res);
163201
const script = extractInitialState($);
164202
const state = JSON.parse(script);
@@ -244,9 +282,7 @@ async function getFullNote(link, displayLivePhoto) {
244282

245283
async function getUserWithCookie(url: string) {
246284
const cookie = config.xiaohongshu.cookie;
247-
const res = await ofetch(url, {
248-
headers: getHeaders(cookie),
249-
});
285+
const res = await fetchWithProxy(url, cookie);
250286
const $ = load(res);
251287
const paths = $('#userPostedFeeds > section > div > a.cover.ld.mask').map((i, item) => item.attributes[3].value);
252288
const script = extractInitialState($);
@@ -275,6 +311,29 @@ function extractInitialState($) {
275311
return script;
276312
}
277313

314+
// Add helper function to extract initial SSR state
315+
function extractInitialSsrState($) {
316+
let script = $('script')
317+
.filter((i, script) => {
318+
const text = script.children[0]?.data;
319+
return text?.includes('window.__INITIAL_SSR_STATE__=');
320+
})
321+
.text();
322+
const match = script.match(/window\.__INITIAL_SSR_STATE__\s*=\s*(\{[\s\S]*?\})\s*(?:;|$)/);
323+
if (match) {
324+
return match[1].replaceAll('undefined', 'null');
325+
}
326+
// Fallback: try simple extraction
327+
const startMarker = 'window.__INITIAL_SSR_STATE__=';
328+
const startIndex = script.indexOf(startMarker);
329+
if (startIndex !== -1) {
330+
script = script.slice(startIndex + startMarker.length);
331+
script = script.replaceAll('undefined', 'null');
332+
return script;
333+
}
334+
throw new Error('Cannot extract __INITIAL_SSR_STATE__');
335+
}
336+
278337
async function checkCookie() {
279338
const cookie = config.xiaohongshu.cookie;
280339
const res = await ofetch('https://edith.xiaohongshu.com/api/sns/web/v2/user/me', {

0 commit comments

Comments
 (0)