Skip to content

Commit af64584

Browse files
committed
push commit
1 parent b62a002 commit af64584

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ export const defaultConfig: Config = {
66
maxPagesToCrawl: 50,
77
outputFileName: "output.json",
88
maxTokens: 2000000,
9+
// proxyUrls: ["http://username:password@proxyserver:port"], // socks5://username:password@proxyserver:port
910
};

src/config.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ export const configSchema = z.object({
8585
* @example 5000
8686
*/
8787
maxTokens: z.number().int().positive().optional(),
88+
/** Optional proxy server
89+
* @example ['http://username:password@proxyserver:port', 'socks5://username:password@proxyserver:port']
90+
*/
91+
proxyUrls: z.array(z.string()).optional(),
8892
});
8993

9094
export type Config = z.infer<typeof configSchema>;

src/core.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// For more information, see https://crawlee.dev/
2-
import { Configuration, PlaywrightCrawler, downloadListOfUrls } from "crawlee";
2+
import { Configuration, PlaywrightCrawler, ProxyConfiguration, downloadListOfUrls } from "crawlee";
33
import { readFile, writeFile } from "fs/promises";
44
import { glob } from "glob";
55
import { Config, configSchema } from "./config.js";
@@ -54,8 +54,13 @@ export async function crawl(config: Config) {
5454
if (process.env.NO_CRAWL !== "true") {
5555
// PlaywrightCrawler crawls the web using a headless
5656
// browser controlled by the Playwright library.
57+
const proxyConfiguration = new ProxyConfiguration({
58+
proxyUrls: config.proxyUrls,
59+
});
60+
5761
crawler = new PlaywrightCrawler(
5862
{
63+
proxyConfiguration,
5964
// Use the requestHandler to process each of the crawled pages.
6065
async requestHandler({ request, page, enqueueLinks, log, pushData }) {
6166
const title = await page.title();

0 commit comments

Comments
 (0)