1- import type { Readable } from 'node:stream' ;
1+ import { Readable } from 'node:stream' ;
22
33import type { AllowedHttpMethods } from '@crawlee/types' ;
44import { applySearchParams , type SearchParams } from '@crawlee/utils' ;
55
6- import type { FormDataLike } from './form-data-like.js' ;
7-
8- type Timeout =
9- | {
10- lookup : number ;
11- connect : number ;
12- secureConnect : number ;
13- socket : number ;
14- send : number ;
15- response : number ;
16- }
17- | { request : number } ;
18-
19- /**
20- * Maps permitted values of the `responseType` option on {@apilink HttpRequest} to the types that they produce.
21- */
22- export interface ResponseTypes {
23- 'json' : unknown ;
24- 'text' : string ;
25- 'buffer' : Buffer ;
26- }
27-
28- interface Progress {
29- percent : number ;
30- transferred : number ;
31- total ?: number ;
32- }
6+ import type { Session } from '../session_pool/session.js' ;
337
348// TODO BC with got - remove the options and callback parameters in 4.0
359interface ToughCookieJar {
@@ -53,28 +27,23 @@ interface PromiseCookieJar {
5327 setCookie : ( rawCookie : string , url : string ) => Promise < unknown > ;
5428}
5529
56- type SimpleHeaders = Record < string , string | string [ ] | undefined > ;
57-
5830/**
5931 * HTTP Request as accepted by {@apilink BaseHttpClient} methods.
6032 */
61- export interface HttpRequest < TResponseType extends keyof ResponseTypes = 'text' > {
62- [ k : string ] : unknown ; // TODO BC with got - remove in 4.0
63-
33+ export interface HttpRequest {
6434 url : string | URL ;
6535 method ?: AllowedHttpMethods ;
66- headers ?: SimpleHeaders ;
67- body ?: string | Buffer | Readable | Generator | AsyncGenerator | FormDataLike ;
36+ headers ?: Headers ;
37+ body ?: Readable ;
6838
6939 signal ?: AbortSignal ;
70- timeout ?: Partial < Timeout > ;
40+ timeout ?: number ;
7141
7242 cookieJar ?: ToughCookieJar | PromiseCookieJar ;
7343 followRedirect ?: boolean | ( ( response : any ) => boolean ) ; // TODO BC with got - specify type better in 4.0
7444 maxRedirects ?: number ;
7545
7646 encoding ?: BufferEncoding ;
77- responseType ?: TResponseType ;
7847 throwHttpErrors ?: boolean ;
7948
8049 // from got-scraping Context
@@ -91,8 +60,7 @@ export interface HttpRequest<TResponseType extends keyof ResponseTypes = 'text'>
9160/**
9261 * Additional options for HTTP requests that need to be handled separately before passing to {@apilink BaseHttpClient}.
9362 */
94- export interface HttpRequestOptions < TResponseType extends keyof ResponseTypes = 'text' >
95- extends HttpRequest < TResponseType > {
63+ export interface HttpRequestOptions extends HttpRequest {
9664 /** Search (query string) parameters to be appended to the request URL */
9765 searchParams ?: SearchParams ;
9866
@@ -107,28 +75,6 @@ export interface HttpRequestOptions<TResponseType extends keyof ResponseTypes =
10775 password ?: string ;
10876}
10977
110- /**
111- * HTTP response data, without a body, as returned by {@apilink BaseHttpClient} methods.
112- */
113- export interface BaseHttpResponseData {
114- redirectUrls : URL [ ] ;
115- url : string ;
116-
117- ip ?: string ;
118- statusCode : number ;
119- statusMessage ?: string ;
120-
121- headers : SimpleHeaders ;
122- trailers : SimpleHeaders ; // Populated after the whole message is processed
123-
124- complete : boolean ;
125- }
126-
127- interface HttpResponseWithoutBody < TResponseType extends keyof ResponseTypes = keyof ResponseTypes >
128- extends BaseHttpResponseData {
129- request : HttpRequest < TResponseType > ;
130- }
131-
13278export class ResponseWithUrl extends Response {
13379 override url : string ;
13480 constructor ( body : BodyInit | null , init : ResponseInit & { url ?: string } ) {
@@ -137,63 +83,52 @@ export class ResponseWithUrl extends Response {
13783 }
13884}
13985
140- /**
141- * HTTP response data as returned by the {@apilink BaseHttpClient.sendRequest} method.
142- */
143- export interface HttpResponse < TResponseType extends keyof ResponseTypes = keyof ResponseTypes >
144- extends HttpResponseWithoutBody < TResponseType > {
145- [ k : string ] : any ; // TODO BC with got - remove in 4.0
146-
147- body : ResponseTypes [ TResponseType ] ;
148- }
149-
150- /**
151- * HTTP response data as returned by the {@apilink BaseHttpClient.stream} method.
152- */
153- export interface StreamingHttpResponse extends HttpResponseWithoutBody {
154- stream : Readable ;
155- readonly downloadProgress : Progress ;
156- readonly uploadProgress : Progress ;
157- }
158-
15986/**
16087 * Type of a function called when an HTTP redirect takes place. It is allowed to mutate the `updatedRequest` argument.
16188 */
16289export type RedirectHandler = (
16390 redirectResponse : Response ,
164- updatedRequest : { url ?: string | URL ; headers : SimpleHeaders } ,
91+ updatedRequest : { url ?: string | URL ; headers : Headers } ,
16592) => void ;
16693
94+ export interface SendRequestOptions {
95+ session ?: Session ;
96+ cookieJar ?: ToughCookieJar ;
97+ timeout ?: number ;
98+ }
99+
100+ export interface StreamOptions extends SendRequestOptions {
101+ onRedirect ?: RedirectHandler ;
102+ }
103+
167104/**
168105 * Interface for user-defined HTTP clients to be used for plain HTTP crawling and for sending additional requests during a crawl.
169106 */
170107export interface BaseHttpClient {
171108 /**
172109 * Perform an HTTP Request and return the complete response.
173110 */
174- sendRequest < TResponseType extends keyof ResponseTypes = 'text' > (
175- request : HttpRequest < TResponseType > ,
176- ) : Promise < Response > ;
111+ sendRequest ( request : Request , options ?: SendRequestOptions ) : Promise < Response > ;
177112
178113 /**
179114 * Perform an HTTP Request and return after the response headers are received. The body may be read from a stream contained in the response.
180115 */
181- stream ( request : HttpRequest , onRedirect ?: RedirectHandler ) : Promise < Response > ;
116+ stream ( request : Request , options ?: StreamOptions ) : Promise < Response > ;
182117}
183118
184119/**
185120 * Converts {@apilink HttpRequestOptions} to a {@apilink HttpRequest}.
186121 */
187- export function processHttpRequestOptions < TResponseType extends keyof ResponseTypes = 'text' > ( {
122+ export function processHttpRequestOptions ( {
188123 searchParams,
189124 form,
190125 json,
191126 username,
192127 password,
193128 ...request
194- } : HttpRequestOptions < TResponseType > ) : HttpRequest < TResponseType > {
129+ } : HttpRequestOptions ) : HttpRequest {
195130 const url = new URL ( request . url ) ;
196- const headers = { ... request . headers } ;
131+ const headers = new Headers ( request . headers ) ;
197132
198133 applySearchParams ( url , searchParams ) ;
199134
@@ -203,27 +138,31 @@ export function processHttpRequestOptions<TResponseType extends keyof ResponseTy
203138
204139 const body = ( ( ) => {
205140 if ( form !== undefined ) {
206- return new URLSearchParams ( form ) . toString ( ) ;
141+ return Readable . from ( new URLSearchParams ( form ) . toString ( ) ) ;
207142 }
208143
209144 if ( json !== undefined ) {
210- return JSON . stringify ( json ) ;
145+ return Readable . from ( JSON . stringify ( json ) ) ;
146+ }
147+
148+ if ( request . body !== undefined ) {
149+ return Readable . from ( request . body ) ;
211150 }
212151
213- return request . body ;
152+ return undefined ;
214153 } ) ( ) ;
215154
216- if ( form !== undefined ) {
217- headers [ 'content-type' ] ??= 'application/x-www-form-urlencoded' ;
155+ if ( form !== undefined && ! headers . has ( 'content-type' ) ) {
156+ headers . set ( 'content-type' , 'application/x-www-form-urlencoded' ) ;
218157 }
219158
220- if ( json !== undefined ) {
221- headers [ 'content-type' ] ??= 'application/json' ;
159+ if ( json !== undefined && ! headers . has ( 'content-type' ) ) {
160+ headers . set ( 'content-type' , 'application/json' ) ;
222161 }
223162
224163 if ( username !== undefined || password !== undefined ) {
225164 const encodedAuth = Buffer . from ( `${ username ?? '' } :${ password ?? '' } ` ) . toString ( 'base64' ) ;
226- headers . authorization = `Basic ${ encodedAuth } ` ;
165+ headers . set ( ' authorization' , `Basic ${ encodedAuth } ` ) ;
227166 }
228167
229168 return { ...request , body, url, headers } ;
0 commit comments