Skip to content

Commit 793368d

Browse files
committed
ensure redirects are followed for sitemap, robots, other requests converted from fetch()
- add a redirect interceptor to getProxyDispatcher(), with option to disable - add getDefaultDispatcher() for defaulting to follow redirects - fixes #954
1 parent 581a703 commit 793368d

File tree

4 files changed

+40
-23
lines changed

4 files changed

+40
-23
lines changed

src/util/browser.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import path from "path";
1010
import { request } from "undici";
1111

1212
import { formatErr, LogContext, logger } from "./logger.js";
13-
import { getSafeProxyString } from "./proxy.js";
13+
import { getDefaultDispatcher, getSafeProxyString } from "./proxy.js";
1414
import { initStorage, S3StorageSync, UploadResult } from "./storage.js";
1515

1616
import {
@@ -250,7 +250,9 @@ export class Browser {
250250
"browser",
251251
);
252252

253-
const resp = await request(profileRemoteSrc);
253+
const resp = await request(profileRemoteSrc, {
254+
dispatcher: getDefaultDispatcher(),
255+
});
254256

255257
await pipeline(resp.body, fs.createWriteStream(profileLocalSrc));
256258
} else if (profileRemoteSrc && profileRemoteSrc.startsWith("@")) {

src/util/file_reader.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import util from "util";
66
import { exec as execCallback } from "child_process";
77

88
import { formatErr, logger } from "./logger.js";
9-
import { getProxyDispatcher } from "./proxy.js";
9+
import { getDefaultDispatcher, getProxyDispatcher } from "./proxy.js";
1010
import { parseRecorderFlowJson } from "./flowbehavior.js";
1111

1212
const exec = util.promisify(execCallback);
@@ -90,7 +90,7 @@ async function writeUrlContentsToFile(
9090

9191
try {
9292
const res = await request(url, {
93-
dispatcher: useProxy ? getProxyDispatcher(url) : undefined,
93+
dispatcher: useProxy ? getProxyDispatcher(url) : getDefaultDispatcher(),
9494
});
9595
if (res.statusCode !== 200) {
9696
throw new Error(`Invalid response, status: ${res.statusCode}`);

src/util/proxy.ts

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import net from "net";
22
import child_process from "child_process";
33
import fs from "fs";
44

5-
import { Agent, Dispatcher, ProxyAgent } from "undici";
5+
import { Agent, Dispatcher, interceptors, ProxyAgent } from "undici";
66
import yaml from "js-yaml";
77

88
import { logger } from "./logger.js";
@@ -22,8 +22,13 @@ const SSH_WAIT_TIMEOUT = 30000;
2222
type ProxyEntry = {
2323
proxyUrl: string;
2424
dispatcher: Dispatcher;
25+
redirectDispatcher: Dispatcher;
2526
};
2627

28+
const defaultDispatcher = new Agent();
29+
30+
const defaultRedirectDispatcher = addRedirectInterceptor(defaultDispatcher);
31+
2732
export type ProxyServerConfig = {
2833
matchHosts?: Record<string, string>;
2934
proxies?: Record<
@@ -192,7 +197,7 @@ export async function initSingleProxy(
192197
detached: boolean,
193198
sshProxyPrivateKeyFile?: string,
194199
sshProxyKnownHostsFile?: string,
195-
): Promise<{ proxyUrl: string; dispatcher: Dispatcher }> {
200+
): Promise<ProxyEntry> {
196201
logger.debug("Initing proxy", {
197202
url: getSafeProxyString(proxyUrl),
198203
localPort,
@@ -215,18 +220,37 @@ export async function initSingleProxy(
215220
};
216221

217222
const dispatcher = createDispatcher(proxyUrl, agentOpts);
218-
return { proxyUrl, dispatcher };
223+
const redirectDispatcher = addRedirectInterceptor(dispatcher);
224+
return { proxyUrl, dispatcher, redirectDispatcher };
219225
}
220226

221-
export function getProxyDispatcher(url: string) {
227+
export function addRedirectInterceptor(dispatcher: Dispatcher) {
228+
// match fetch() max redirects if not doing manual redirects
229+
// https://fetch.spec.whatwg.org/#http-redirect-fetch
230+
const redirector = interceptors.redirect({ maxRedirections: 20 });
231+
return dispatcher.compose(redirector);
232+
}
233+
234+
export function getProxyDispatcher(url: string, withRedirect = true) {
222235
// find url match by regex first
223-
for (const [rx, { dispatcher }] of proxyMap.entries()) {
236+
for (const [rx, { dispatcher, redirectDispatcher }] of proxyMap.entries()) {
224237
if (rx && url.match(rx)) {
225-
return dispatcher;
238+
return withRedirect ? redirectDispatcher : dispatcher;
226239
}
227240
}
228-
// if default proxy set, return default dispatcher, otherwise no dispatcher
229-
return defaultProxyEntry ? defaultProxyEntry.dispatcher : undefined;
241+
242+
// if default proxy set, return dispatcher from default proxy, otherwise a default dispatcher
243+
if (defaultProxyEntry) {
244+
return withRedirect
245+
? defaultProxyEntry.redirectDispatcher
246+
: defaultProxyEntry.dispatcher;
247+
} else {
248+
return getDefaultDispatcher(withRedirect);
249+
}
250+
}
251+
252+
export function getDefaultDispatcher(withRedirect = true) {
253+
return withRedirect ? defaultRedirectDispatcher : defaultDispatcher;
230254
}
231255

232256
export function createDispatcher(

src/util/recorder.ts

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import {
88
isRedirectStatus,
99
} from "./reqresp.js";
1010

11-
import { Agent, Dispatcher, interceptors, request } from "undici";
11+
import { Dispatcher, request } from "undici";
1212

1313
import {
1414
getCustomRewriter,
@@ -55,8 +55,6 @@ const RW_MIME_TYPES = [
5555

5656
const encoder = new TextEncoder();
5757

58-
const defaultAgent = new Agent();
59-
6058
// =================================================================
6159
// eslint-disable-next-line @typescript-eslint/no-explicit-any
6260
function logNetwork(msg: string, data: any) {
@@ -1828,14 +1826,7 @@ class AsyncFetcher {
18281826

18291827
const headers = reqresp.getRequestHeadersDict();
18301828

1831-
let dispatcher = getProxyDispatcher(url) || defaultAgent;
1832-
1833-
if (!this.manualRedirect) {
1834-
// match fetch() max redirects if not doing manual redirects
1835-
// https://fetch.spec.whatwg.org/#http-redirect-fetch
1836-
const redirector = interceptors.redirect({ maxRedirections: 20 });
1837-
dispatcher = dispatcher.compose(redirector);
1838-
}
1829+
let dispatcher = getProxyDispatcher(url, !this.manualRedirect);
18391830

18401831
dispatcher = dispatcher.compose((dispatch) => {
18411832
return (opts, handler) => {

0 commit comments

Comments
 (0)