Skip to content

Commit f539905

Browse files
committed
feat: use native Response API in BrowserCrawler
1 parent 414b3bb commit f539905

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

packages/core/src/crawlers/crawler_commons.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types';
2-
import type { OptionsInit, Response as GotResponse } from 'got-scraping';
2+
import type { OptionsInit } from 'got-scraping';
33
import type { ReadonlyDeep } from 'type-fest';
44

55
import type { Configuration } from '../configuration.js';

packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,21 @@ import { isDeepStrictEqual } from 'node:util';
33
import type { BrowserHook, LoadedContext, LoadedRequest, Request, RouterHandler } from '@crawlee/browser';
44
import { extractUrlsFromPage } from '@crawlee/browser';
55
import type {
6-
BaseHttpResponseData,
76
GetUserDataFromRequest,
87
RestrictedCrawlingContext,
98
RouterRoutes,
109
StatisticPersistedState,
1110
StatisticsOptions,
1211
StatisticState,
1312
} from '@crawlee/core';
14-
import { Configuration, RequestHandlerResult, Router, Statistics, withCheckedStorageAccess } from '@crawlee/core';
13+
import {
14+
Configuration,
15+
RequestHandlerResult,
16+
ResponseWithUrl,
17+
Router,
18+
Statistics,
19+
withCheckedStorageAccess,
20+
} from '@crawlee/core';
1521
import type { Awaitable, Dictionary } from '@crawlee/types';
1622
import { type CheerioRoot, extractUrlsFromCheerio } from '@crawlee/utils';
1723
import { type Cheerio, load } from 'cheerio';
@@ -95,7 +101,7 @@ export interface AdaptivePlaywrightCrawlerContext<UserData extends Dictionary =
95101
/**
96102
* The HTTP response, either from the HTTP client or from the initial request from playwright's navigation.
97103
*/
98-
response: BaseHttpResponseData;
104+
response: ResponseWithUrl;
99105

100106
/**
101107
* Playwright Page object. If accessed in HTTP-only rendering, this will throw an error and make the AdaptivePlaywrightCrawlerContext retry the request in a browser.
@@ -430,20 +436,20 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
430436
);
431437
}
432438
},
433-
() =>
439+
async () =>
434440
this.adaptiveRequestHandler({
435441
id: crawlingContext.id,
436442
session: crawlingContext.session,
437443
proxyInfo: crawlingContext.proxyInfo,
438444
request: crawlingContext.request as LoadedRequest<Request>,
439-
response: {
440-
url: crawlingContext.response!.url(),
441-
statusCode: crawlingContext.response!.status(),
442-
headers: crawlingContext.response!.headers(),
443-
trailers: {},
444-
complete: true,
445-
redirectUrls: [],
446-
},
445+
response: new ResponseWithUrl(
446+
new Uint8Array((await crawlingContext.response?.body()) ?? []),
447+
{
448+
url: crawlingContext.response!.url(),
449+
status: crawlingContext.response!.status(),
450+
headers: crawlingContext.response!.headers(),
451+
},
452+
),
447453
log: crawlingContext.log,
448454
page: crawlingContext.page,
449455
querySelector: async (selector, timeoutMs = 5_000) => {
@@ -549,7 +555,7 @@ export class AdaptivePlaywrightCrawler extends PlaywrightCrawler {
549555
const response = await crawlingContext.sendRequest({});
550556
const loadedUrl = response.url;
551557
crawlingContext.request.loadedUrl = loadedUrl;
552-
const $ = load(response.body);
558+
const $ = load(await response.text());
553559

554560
await this.adaptiveRequestHandler({
555561
...hookContext,

0 commit comments

Comments
 (0)