@@ -6,13 +6,19 @@ x-crawl is a Nodejs multifunctional crawler library.
6
6
7
7
## Feature
8
8
9
- - Crawl HTML, JSON, file resources, etc. with simple configuration
10
- - Use puppeteer to crawl HTML, and use JSDOM library to parse HTML, or parse HTML by yourself
11
- - Support asynchronous/synchronous way to crawl data
12
- - Support Promise/Callback way to get the result
13
- - Polling function
14
- - Anthropomorphic request interval
15
- - Written in TypeScript, provides generics
9
+ - Crawl HTML, JSON, file resources, etc. with simple configuration.
10
+ - Built-in puppeteer crawls HTML and uses JSDOM library to parse HTML.
11
+ - Support asynchronous/synchronous way to crawl data.
12
+ - Support Promise/Callback way to get the result.
13
+ - Polling function.
14
+ - Anthropomorphic request interval.
15
+ - Written in TypeScript, provides generics.
16
+
17
+ ## Benefits provided by using puppeter
18
+
19
+ - Generate screenshots and PDFs of pages.
20
+ - Crawl a SPA (Single-Page Application) and generate pre-rendered content (i.e. "SSR" (Server-Side Rendering)).
21
+ - Automate form submission, UI testing, keyboard input, etc.
16
22
17
23
# Table of Contents
18
24
@@ -41,14 +47,15 @@ x-crawl is a Nodejs multifunctional crawler library.
41
47
* [ Method] ( #Method )
42
48
* [ RequestConfig] ( #RequestConfig )
43
49
* [ IntervalTime] ( #IntervalTime )
44
- * [ FetchBaseConifg] ( #FetchBaseConifg )
45
50
* [ XCrawlBaseConifg] ( #XCrawlBaseConifg )
51
+ * [ FetchBaseConifgV1] ( #FetchBaseConifgV1 )
52
+ * [ FetchBaseConifgV2] ( #FetchBaseConifgV2 )
46
53
* [ FetchHTMLConfig] ( #FetchHTMLConfig )
47
- * [ FetchDataConfig] ( #FetchDataConfig )
54
+ * [ FetchDataConfig] ( #FetchDataConfig )
48
55
* [ FetchFileConfig] ( #FetchFileConfig )
49
56
* [ StartPollingConfig] ( #StartPollingConfig )
50
- * [ FetchCommon ] ( #FetchCommon )
51
- * [ FetchCommonArr ] ( #FetchCommonArr )
57
+ * [ FetchResCommonV1 ] ( #FetchResCommonV1 )
58
+ * [ FetchResCommonArrV1 ] ( #FetchResCommonArrV1 )
52
59
* [ FileInfo] ( #FileInfo )
53
60
* [ FetchHTML] ( #FetchHTML )
54
61
- [ More] ( #More )
@@ -318,7 +325,6 @@ interface FetchBaseConifgV1 {
318
325
` ` ` ts
319
326
interface FetchBaseConifgV2 {
320
327
url: string
321
- header?: AnyObject
322
328
timeout?: number
323
329
proxy?: string
324
330
}
@@ -364,7 +370,7 @@ interface StartPollingConfig {
364
370
interface FetchCommon< T > {
365
371
id: number
366
372
statusCode: number | undefined
367
- headers: IncomingHttpHeaders // node : http type
373
+ headers: IncomingHttpHeaders // nodejs : http type
368
374
data: T
369
375
}
370
376
` ` `
@@ -392,8 +398,7 @@ interface FileInfo {
392
398
interface FetchHTML {
393
399
httpResponse: HTTPResponse | null // The type of HTTPResponse in the puppeteer library
394
400
data: {
395
- page: Page
396
- content: string
401
+ page: Page // The type of Page in the puppeteer library
397
402
jsdom: JSDOM // The type of JSDOM in the jsdom library
398
403
}
399
404
}
0 commit comments