Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/util/blockrules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,6 @@ export class BlockRules {
method: "PUT",
headers: { "Content-Type": "text/html" },
body,
dispatcher: getProxyDispatcher(putUrl.href),
});
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/util/browser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import path from "path";
import { request } from "undici";

import { formatErr, LogContext, logger } from "./logger.js";
import { getSafeProxyString } from "./proxy.js";
import { getFollowRedirectDispatcher, getSafeProxyString } from "./proxy.js";
import { initStorage, S3StorageSync, UploadResult } from "./storage.js";

import {
Expand Down Expand Up @@ -250,7 +250,9 @@ export class Browser {
"browser",
);

const resp = await request(profileRemoteSrc);
const resp = await request(profileRemoteSrc, {
dispatcher: getFollowRedirectDispatcher(),
});

await pipeline(resp.body, fs.createWriteStream(profileLocalSrc));
} else if (profileRemoteSrc && profileRemoteSrc.startsWith("@")) {
Expand Down
7 changes: 2 additions & 5 deletions src/util/file_reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import util from "util";
import { exec as execCallback } from "child_process";

import { formatErr, logger } from "./logger.js";
import { getProxyDispatcher } from "./proxy.js";
import { getFollowRedirectDispatcher } from "./proxy.js";
import { parseRecorderFlowJson } from "./flowbehavior.js";

const exec = util.promisify(execCallback);
Expand Down Expand Up @@ -69,7 +69,6 @@ async function writeUrlContentsToFile(
pathPrefix: string,
pathDefaultExt: string,
fetchNew = false,
useProxy = false,
): Promise<string> {
const filename =
path.basename(new URL(url).pathname) || "index." + pathDefaultExt;
Expand All @@ -90,7 +89,7 @@ async function writeUrlContentsToFile(

try {
const res = await request(url, {
dispatcher: useProxy ? getProxyDispatcher(url) : undefined,
dispatcher: getFollowRedirectDispatcher(),
});
if (res.statusCode !== 200) {
throw new Error(`Invalid response, status: ${res.statusCode}`);
Expand Down Expand Up @@ -121,7 +120,6 @@ export async function collectOnlineSeedFile(
"seeds-",
".txt",
false,
false,
);
logger.info("Seed file downloaded", { url, path: filepath });
return filepath;
Expand Down Expand Up @@ -241,7 +239,6 @@ async function collectOnlineBehavior(
"behaviors-",
".js",
true,
false,
);
logger.info(
"Custom behavior file downloaded",
Expand Down
69 changes: 55 additions & 14 deletions src/util/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import net from "net";
import child_process from "child_process";
import fs from "fs";

import { Agent, Dispatcher, ProxyAgent } from "undici";
import { Agent, Dispatcher, interceptors, ProxyAgent } from "undici";
import yaml from "js-yaml";

import { logger } from "./logger.js";
Expand All @@ -17,13 +17,39 @@ const SSH_PROXY_LOCAL_PORT = 9722;

const SSH_WAIT_TIMEOUT = 30000;

//let proxyDispatcher: Dispatcher | undefined = undefined;

type ProxyEntry = {
proxyUrl: string;
dispatcher: Dispatcher;
redirectDispatcher: Dispatcher;
};

// Opts for all requests
const baseOpts: Agent.Options = {
headersTimeout: FETCH_HEADERS_TIMEOUT_SECS * 1000,

// allow HTTP/2 connections
allowH2: true,
};

// Opts for all archival content requests
const contentAgentOpts: Agent.Options = {
...baseOpts,

// ignore invalid SSL certs (matches browser settings)
connect: {
rejectUnauthorized: false,
},
};

// dispatcher for archival content without following redirects
const contentDispatcher = new Agent(contentAgentOpts);

// dispatcher for archival content with following redirects
const contentRedirectDispatcher = addRedirectInterceptor(contentDispatcher);

// dispatcher for following redirects, non-archival content, trust SSL
const followRedirectDispatcher = new Agent(baseOpts);

export type ProxyServerConfig = {
matchHosts?: Record<string, string>;
proxies?: Record<
Expand Down Expand Up @@ -192,7 +218,7 @@ export async function initSingleProxy(
detached: boolean,
sshProxyPrivateKeyFile?: string,
sshProxyKnownHostsFile?: string,
): Promise<{ proxyUrl: string; dispatcher: Dispatcher }> {
): Promise<ProxyEntry> {
logger.debug("Initing proxy", {
url: getSafeProxyString(proxyUrl),
localPort,
Expand All @@ -210,23 +236,38 @@ export async function initSingleProxy(
);
}

const agentOpts: Agent.Options = {
headersTimeout: FETCH_HEADERS_TIMEOUT_SECS * 1000,
};
const dispatcher = createDispatcher(proxyUrl, contentAgentOpts);
const redirectDispatcher = addRedirectInterceptor(dispatcher);
return { proxyUrl, dispatcher, redirectDispatcher };
}

const dispatcher = createDispatcher(proxyUrl, agentOpts);
return { proxyUrl, dispatcher };
export function addRedirectInterceptor(dispatcher: Dispatcher) {
// match fetch() max redirects if not doing manual redirects
// https://fetch.spec.whatwg.org/#http-redirect-fetch
const redirector = interceptors.redirect({ maxRedirections: 20 });
return dispatcher.compose(redirector);
}

export function getProxyDispatcher(url: string) {
export function getProxyDispatcher(url: string, withRedirect = true) {
// find url match by regex first
for (const [rx, { dispatcher }] of proxyMap.entries()) {
for (const [rx, { dispatcher, redirectDispatcher }] of proxyMap.entries()) {
if (rx && url.match(rx)) {
return dispatcher;
return withRedirect ? redirectDispatcher : dispatcher;
}
}
// if default proxy set, return default dispatcher, otherwise no dispatcher
return defaultProxyEntry ? defaultProxyEntry.dispatcher : undefined;

// if default proxy set, return dispatcher from default proxy, otherwise a default dispatcher
if (defaultProxyEntry) {
return withRedirect
? defaultProxyEntry.redirectDispatcher
: defaultProxyEntry.dispatcher;
} else {
return withRedirect ? contentRedirectDispatcher : contentDispatcher;
}
}

export function getFollowRedirectDispatcher() {
return followRedirectDispatcher;
}

export function createDispatcher(
Expand Down
13 changes: 2 additions & 11 deletions src/util/recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
isRedirectStatus,
} from "./reqresp.js";

import { Agent, Dispatcher, interceptors, request } from "undici";
import { Dispatcher, request } from "undici";

import {
getCustomRewriter,
Expand Down Expand Up @@ -55,8 +55,6 @@ const RW_MIME_TYPES = [

const encoder = new TextEncoder();

const defaultAgent = new Agent();

// =================================================================
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function logNetwork(msg: string, data: any) {
Expand Down Expand Up @@ -1828,14 +1826,7 @@ class AsyncFetcher {

const headers = reqresp.getRequestHeadersDict();

let dispatcher = getProxyDispatcher(url) || defaultAgent;

if (!this.manualRedirect) {
// match fetch() max redirects if not doing manual redirects
// https://fetch.spec.whatwg.org/#http-redirect-fetch
const redirector = interceptors.redirect({ maxRedirections: 20 });
dispatcher = dispatcher.compose(redirector);
}
let dispatcher = getProxyDispatcher(url, !this.manualRedirect);

dispatcher = dispatcher.compose((dispatch) => {
return (opts, handler) => {
Expand Down
Loading