@@ -41,20 +41,20 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
41
41
* [ Multiple ways of writing requestConfig options] ( #Multiple-ways-of-writing-requestConfig-options )
42
42
* [ Multiple ways to get results] ( #Multiple-ways-to-get-results )
43
43
- [ API] ( #API )
44
- * [ x-crawl ] ( #x-crawl-2 )
45
- + [ Type] ( #Type-1 )
44
+ * [ xCrawl ] ( #xCrawl )
45
+ + [ Type] ( #Type )
46
46
+ [ Example] ( #Example-1 )
47
47
* [ crawlPage] ( #crawlPage )
48
- + [ Type] ( #Type-2 )
48
+ + [ Type] ( #Type-1 )
49
49
+ [ Example] ( #Example-2 )
50
50
* [ crawlData] ( #crawlData )
51
- + [ Type] ( #Type-3 )
51
+ + [ Type] ( #Type-2 )
52
52
+ [ Example] ( #Example-3 )
53
53
* [ crawlFile] ( #crawlFile )
54
- + [ Type] ( #Type-4 )
54
+ + [ Type] ( #Type-3 )
55
55
+ [ Example] ( #Example-4 )
56
56
* [ crawlPolling] ( #crawlPolling )
57
- + [ Type] ( #Type-5 )
57
+ + [ Type] ( #Type-4 )
58
58
+ [ Example] ( #Example-5 )
59
59
- [ Types] ( #Types )
60
60
* [ AnyObject] ( #AnyObject )
@@ -64,14 +64,14 @@ The crawlPage API internally uses the [puppeteer](https://github.com/puppeteer/p
64
64
* [ RequestConfig] ( #RequestConfig )
65
65
* [ IntervalTime] ( #IntervalTime )
66
66
* [ XCrawlBaseConfig] ( #XCrawlBaseConfig )
67
- * [ CrawlPageConfig] ( #CrawlPageConfig )
67
+ * [ CrawlPageConfig] ( #CrawlPageConfig )
68
68
* [ CrawlBaseConfigV1] ( #CrawlBaseConfigV1 )
69
69
* [ CrawlDataConfig] ( #CrawlDataConfig )
70
70
* [ CrawlFileConfig] ( #CrawlFileConfig )
71
71
* [ StartPollingConfig] ( #StartPollingConfig )
72
72
* [ CrawlResCommonV1] ( #CrawlResCommonV1 )
73
73
* [ CrawlResCommonArrV1] ( #CrawlResCommonArrV1 )
74
- * [ CrawlPage] ( #CrawlPage-2 )
74
+ * [ CrawlPage] ( #CrawlPage-1 )
75
75
* [ FileInfo] ( #FileInfo )
76
76
- [ More] ( #More )
77
77
@@ -98,23 +98,25 @@ const myXCrawl = xCrawl({
98
98
})
99
99
100
100
// 3.Set the crawling task
101
- // Call the startPolling API to start the polling function, and the callback function will be called every other day
102
- myXCrawl .startPolling ({ d: 1 }, (count , stopPolling ) => {
103
- myXCrawl .crawlPage (' https://zh.airbnb.com/s/*/plus_homes' ).then ((res ) => {
104
- const { jsdom } = res // By default, the JSDOM library is used to parse Page
105
-
106
- // Get the cover image elements for Plus listings
107
- const imgEls = jsdom .window .document
108
- .querySelector (' .a1stauiv' )
109
- ? .querySelectorAll (' picture img' )
110
-
111
- // set request configuration
112
- const requestConfig: string [] = []
113
- imgEls? .forEach ((item ) => requestConfig .push (item .src ))
114
-
115
- // Call the crawlFile API to crawl pictures
116
- myXCrawl .crawlFile ({ requestConfig, fileConfig: { storeDir: ' ./upload' } })
117
- })
101
+ /*
102
+ Call the startPolling API to start the polling function,
103
+ and the callback function will be called every other day
104
+ */
105
+ myXCrawl .startPolling ({ d: 1 }, async (count , stopPolling ) => {
106
+ // Call crawlPage API to crawl Page
107
+ const { jsdom } = await myXCrawl .crawlPage (' https://zh.airbnb.com/s/*/plus_homes' )
108
+
109
+ // Get the cover image elements for Plus listings
110
+ const imgEls = jsdom .window .document
111
+ .querySelector (' .a1stauiv' )
112
+ ? .querySelectorAll (' picture img' )
113
+
114
+ // set request configuration
115
+ const requestConfig: string [] = []
116
+ imgEls? .forEach ((item ) => requestConfig .push (item .src ))
117
+
118
+ // Call the crawlFile API to crawl pictures
119
+ myXCrawl .crawlFile ({ requestConfig, fileConfig: { storeDir: ' ./upload' } })
118
120
})
119
121
` ` `
120
122
@@ -136,7 +138,7 @@ running result:
136
138
137
139
#### An example of a crawler application
138
140
139
- Create a new **application instance** via [xCrawl()](#x-crawl-2 ):
141
+ Create a new **application instance** via [xCrawl()](#xCrawl ):
140
142
141
143
` ` ` js
142
144
import xCrawl from ' x-crawl'
@@ -321,13 +323,10 @@ const myXCrawl = xCrawl({
321
323
intervalTime: { max: 3000 , min: 1000 }
322
324
})
323
325
324
- myXCrawl . startPolling ({ h: 2 , m: 30 }, (count , stopPolling ) => {
326
+ myXCrawl . startPolling ({ h: 2 , m: 30 }, async (count , stopPolling ) => {
325
327
// will be executed every two and a half hours
326
328
// crawlPage/crawlData/crawlFile
327
- myXCrawl .crawlPage (' https://xxx.com' ).then (res => {
328
- const { jsdom , browser , page } = res
329
-
330
- })
329
+ const { jsdom , browser , page } = await myXCrawl .crawlPage (' https://xxx.com' )
331
330
})
332
331
` ` `
333
332
@@ -476,7 +475,7 @@ It can be selected according to the actual situation.
476
475
477
476
## API
478
477
479
- ### x-crawl
478
+ ### xCrawl
480
479
481
480
Create a crawler instance via call xCrawl. The request queue is maintained by the instance method itself, not by the instance itself.
482
481
@@ -515,7 +514,7 @@ crawlPage is the method of the crawler instance, usually used to crawl page.
515
514
#### Type
516
515
517
516
- Look at the [CrawlPageConfig](#CrawlPageConfig) type
518
- - Look at the [CrawlPage](#CrawlPage-2 ) type
517
+ - Look at the [CrawlPage](#CrawlPage-1 ) type
519
518
520
519
` ` ` ts
521
520
function crawlPage: (
0 commit comments