Skip to content

Commit 2a6b2da

Browse files
Copilotbarjin
andauthored
feat: add AbortSignal and rename timeouttimeoutMillis in SendRequestOptions (#3457)
`SendRequestOptions` lacked an `AbortSignal` option, preventing users from passing custom cancellation signals to `BaseHttpClient`. The `timeout` field also didn't follow the repo convention of using `timeoutMillis`. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: barjin <61918049+barjin@users.noreply.github.com> Co-authored-by: Jindřich Bär <jindrichbar@gmail.com>
1 parent 75e0908 commit 2a6b2da

File tree

11 files changed

+130
-33
lines changed

11 files changed

+130
-33
lines changed

packages/basic-crawler/src/internals/send-request.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ export function createSendRequest(
3838
return httpClient.sendRequest(request, {
3939
session,
4040
cookieJar: overrideOptions?.cookieJar ?? session?.cookieJar,
41-
timeout: overrideOptions.timeout,
41+
timeoutMillis: overrideOptions.timeoutMillis,
42+
signal: overrideOptions.signal,
4243
});
4344
};
4445
}

packages/http-client/src/base-http-client.ts

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,22 @@ export abstract class BaseHttpClient implements BaseHttpClientInterface {
5656
private resolveRequestContext(options?: SendRequestOptions): {
5757
proxyUrl?: string;
5858
cookieJar: CookieJar;
59-
timeout?: number;
59+
signal?: AbortSignal;
6060
} {
6161
const proxyUrl = options?.proxyUrl ?? options?.session?.proxyInfo?.url;
6262
const cookieJar = options?.cookieJar ?? options?.session?.cookieJar ?? new CookieJar();
63-
const timeout = options?.timeout;
64-
return { proxyUrl, cookieJar: cookieJar as CookieJar, timeout };
63+
const signal = this.createAbortSignal(options?.signal, options?.timeoutMillis);
64+
return { proxyUrl, cookieJar: cookieJar as CookieJar, signal };
6565
}
6666

67-
private createAbortSignal(timeout?: number): AbortSignal | undefined {
68-
return timeout ? AbortSignal.timeout(timeout) : undefined;
67+
private createAbortSignal(signal?: AbortSignal, timeoutMillis?: number): AbortSignal | undefined {
68+
if (signal && timeoutMillis) {
69+
return AbortSignal.any([signal, AbortSignal.timeout(timeoutMillis)]);
70+
}
71+
if (signal) {
72+
return signal;
73+
}
74+
return timeoutMillis ? AbortSignal.timeout(timeoutMillis) : undefined;
6975
}
7076

7177
private isRedirect(response: Response): boolean {
@@ -112,14 +118,14 @@ export abstract class BaseHttpClient implements BaseHttpClientInterface {
112118
let currentRequest = initialRequest;
113119
let redirectCount = 0;
114120

115-
const { proxyUrl, cookieJar, timeout } = this.resolveRequestContext(options);
121+
const { proxyUrl, cookieJar, signal } = this.resolveRequestContext(options);
116122
currentRequest = initialRequest.clone();
117123

118124
while (true) {
119125
await this.applyCookies(currentRequest, cookieJar);
120126

121127
const response = await this.fetch(currentRequest, {
122-
signal: this.createAbortSignal(timeout),
128+
signal,
123129
proxyUrl,
124130
redirect: 'manual',
125131
});

packages/http-crawler/src/internals/http-crawler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ export class HttpCrawler<
815815
} as RequestInit),
816816
{
817817
session,
818-
timeout: opts.timeout,
818+
timeoutMillis: opts.timeout,
819819
},
820820
);
821821

packages/types/src/http-client.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,10 @@ export type RedirectHandler = (
6666
export interface SendRequestOptions {
6767
session?: ISession;
6868
cookieJar?: CookieJar;
69-
timeout?: number;
69+
/** Timeout for the HTTP request in milliseconds. */
70+
timeoutMillis?: number;
71+
/** An AbortSignal to cancel the HTTP request. */
72+
signal?: AbortSignal;
7073
/**
7174
* Overrides the proxy URL set in the `session` for this request.
7275
*

packages/utils/src/internals/robots.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,10 @@ export class RobotsTxtFile {
6565
): Promise<RobotsTxtFile> {
6666
const { proxyUrl, httpClient = new FetchHttpClient() } = options || {};
6767

68-
const response = await httpClient.sendRequest(new Request(url, { method: 'GET', signal: options?.signal }), {
68+
const response = await httpClient.sendRequest(new Request(url, { method: 'GET' }), {
6969
proxyUrl,
70-
timeout: options?.timeoutMillis,
70+
timeoutMillis: options?.timeoutMillis,
71+
signal: options?.signal,
7172
});
7273

7374
if (response.status < 200 || response.status >= 300) {

packages/utils/src/internals/sitemap.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ export async function* parseSitemap<T extends ParseSitemapOptions>(
269269
}),
270270
{
271271
proxyUrl,
272-
timeout,
272+
timeoutMillis: timeout,
273273
},
274274
);
275275
} catch (error: any) {
@@ -536,7 +536,11 @@ export async function* discoverValidSitemaps(
536536
return false;
537537
}
538538
try {
539-
const response = await httpClient.sendRequest(new Request(url, { method: 'HEAD' }), { proxyUrl });
539+
const response = await httpClient.sendRequest(new Request(url, { method: 'HEAD' }), {
540+
proxyUrl,
541+
timeoutMillis: requestTimeoutMillis,
542+
signal,
543+
});
540544
return response.status >= 200 && response.status < 400;
541545
} catch {
542546
return false;

packages/utils/test/mock-http-client.ts

Lines changed: 0 additions & 13 deletions
This file was deleted.

packages/utils/test/robots.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
import { FetchHttpClient } from '@crawlee/http-client';
12
import nock from 'nock';
23
import { beforeEach, describe, expect, it } from 'vitest';
34

45
import { RobotsTxtFile } from '../src/internals/robots.js';
5-
import { FetchHttpClient } from './mock-http-client.js';
66

77
const httpClient = new FetchHttpClient();
88

@@ -63,7 +63,7 @@ describe('RobotsTxtFile', () => {
6363

6464
it('respects user-set timeout', async () => {
6565
const start = +Date.now();
66-
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', undefined, { timeoutMillis: 200 });
66+
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', { timeoutMillis: 200 });
6767

6868
await expect(robots).rejects.toThrow(/timeout/i);
6969
const end = +Date.now();
@@ -77,7 +77,7 @@ describe('RobotsTxtFile', () => {
7777
setTimeout(() => controller.abort(), 200);
7878

7979
const start = +Date.now();
80-
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', undefined, { signal: controller.signal });
80+
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', { signal: controller.signal });
8181

8282
await expect(robots).rejects.toThrow(/aborted/i);
8383
const end = +Date.now();
@@ -90,7 +90,7 @@ describe('RobotsTxtFile', () => {
9090
const controller = new AbortController();
9191

9292
const start = +Date.now();
93-
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', undefined, {
93+
const robots = RobotsTxtFile.find('http://not-exists.com/robots.txt', {
9494
signal: controller.signal,
9595
timeoutMillis: 200,
9696
});

packages/utils/test/sitemap.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1+
import { FetchHttpClient } from '@crawlee/http-client';
12
import nock from 'nock';
23
import { beforeEach, describe, expect, it, vi } from 'vitest';
34

45
import log from '@apify/log';
56

67
import type { SitemapUrl } from '../src/internals/sitemap.js';
78
import { discoverValidSitemaps, parseSitemap, Sitemap } from '../src/internals/sitemap.js';
8-
import { FetchHttpClient } from './mock-http-client.js';
99

1010
describe('Sitemap', () => {
1111
beforeEach(() => {

test/core/base-http-client.test.ts

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import http from 'node:http';
2+
import type { AddressInfo } from 'node:net';
3+
4+
import { FetchHttpClient } from '@crawlee/http-client';
5+
import { afterAll, beforeAll, describe, expect, test } from 'vitest';
6+
7+
let server: http.Server;
8+
let url: string;
9+
10+
beforeAll(async () => {
11+
server = http.createServer((_req, res) => {
12+
res.setHeader('content-type', 'text/plain');
13+
res.end('ok');
14+
});
15+
16+
await new Promise<void>((resolve) =>
17+
server.listen(() => {
18+
url = `http://127.0.0.1:${(server.address() as AddressInfo).port}`;
19+
resolve();
20+
}),
21+
);
22+
});
23+
24+
afterAll(async () => {
25+
await new Promise((resolve) => server.close(resolve));
26+
});
27+
28+
const httpClient = new FetchHttpClient();
29+
30+
describe('BaseHttpClient signal and timeoutMillis options', () => {
31+
test('sends a request without any signal or timeout', async () => {
32+
const response = await httpClient.sendRequest(new Request(url));
33+
expect(response.status).toBe(200);
34+
});
35+
36+
test('aborts when a pre-aborted signal is passed', async () => {
37+
const controller = new AbortController();
38+
controller.abort();
39+
40+
await expect(httpClient.sendRequest(new Request(url), { signal: controller.signal })).rejects.toThrow();
41+
});
42+
43+
test('aborts when the signal is aborted after the request starts', async () => {
44+
const controller = new AbortController();
45+
setTimeout(() => controller.abort(), 50);
46+
47+
const slowServer = http.createServer((_req, res) => {
48+
setTimeout(() => res.end('late'), 500);
49+
});
50+
51+
await new Promise<void>((r) => slowServer.listen(r));
52+
const slowUrl = `http://127.0.0.1:${(slowServer.address() as AddressInfo).port}`;
53+
54+
try {
55+
await expect(httpClient.sendRequest(new Request(slowUrl), { signal: controller.signal })).rejects.toThrow();
56+
} finally {
57+
await new Promise((r) => slowServer.close(r));
58+
}
59+
});
60+
61+
test('aborts when timeoutMillis elapses', async () => {
62+
const slowServer = http.createServer((_req, res) => {
63+
setTimeout(() => res.end('late'), 500);
64+
});
65+
66+
await new Promise<void>((r) => slowServer.listen(r));
67+
const slowUrl = `http://127.0.0.1:${(slowServer.address() as AddressInfo).port}`;
68+
69+
try {
70+
await expect(httpClient.sendRequest(new Request(slowUrl), { timeoutMillis: 50 })).rejects.toThrow();
71+
} finally {
72+
await new Promise((r) => slowServer.close(r));
73+
}
74+
});
75+
76+
test('aborts when both signal and timeoutMillis are provided and the signal fires first', async () => {
77+
const slowServer = http.createServer((_req, res) => {
78+
setTimeout(() => res.end('late'), 500);
79+
});
80+
81+
await new Promise<void>((r) => slowServer.listen(r));
82+
const slowUrl = `http://127.0.0.1:${(slowServer.address() as AddressInfo).port}`;
83+
84+
const controller = new AbortController();
85+
setTimeout(() => controller.abort(), 50);
86+
87+
try {
88+
await expect(
89+
httpClient.sendRequest(new Request(slowUrl), { signal: controller.signal, timeoutMillis: 5_000 }),
90+
).rejects.toThrow();
91+
} finally {
92+
await new Promise((r) => slowServer.close(r));
93+
}
94+
});
95+
});

0 commit comments

Comments
 (0)